1use crate::core::{is_unique, RUMResult, RUMVec};
21use crate::types::RUMBuffer;
22use base64::prelude::*;
23use chardetng::EncodingDetector;
24use encoding_rs::Encoding;
25use std::cmp::min;
26pub use std::format as rumtk_format;
27use unicode_segmentation::UnicodeSegmentation;
28const ESCAPED_STRING_WINDOW: usize = 6;
30const ASCII_ESCAPE_CHAR: char = '\\';
31const MIN_ASCII_READABLE: char = ' ';
32const MAX_ASCII_READABLE: char = '~';
33pub const EMPTY_STRING: &str = "";
34pub static EMPTY_RUMSTRING: RUMString = RUMString::default();
35pub const DOT_STR: &str = ".";
36pub const EMPTY_STRING_OPTION: Option<&str> = Some("");
37pub const READABLE_ASCII: &str = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~";
38
39pub type RUMString = String;
41pub type EscapeException<'a> = (&'a str, &'a str);
42pub type EscapeExceptions<'a> = &'a [EscapeException<'a>];
43pub type StringReplacementPair<'a> = [(&'a str, &'a str)];
44pub type Grapheme<'a> = &'a str;
45pub type GraphemeStringView<'a> = RUMVec<Grapheme<'a>>;
46pub type GraphemePattern<'a> = &'a [Grapheme<'a>];
47pub type GraphemeSlice<'b, 'a> = &'b [Grapheme<'a>];
48pub type GraphemePatternPair<'a> = (GraphemePattern<'a>, GraphemePattern<'a>);
49
50#[derive(Default, Debug, PartialEq, Clone)]
55pub struct GraphemeStr<'a> {
56 view: GraphemeStringView<'a>,
57 start: usize,
58 end: usize,
59}
60
61impl<'a> GraphemeStr<'a> {
62 pub fn from(string: &'a str) -> Self {
63 let view = string.graphemes(true).collect::<GraphemeStringView>();
64 Self::from_view(view)
65 }
66
67 pub fn from_view(view: GraphemeStringView<'a>) -> Self {
68 let start = 0;
69 let end = view.len();
70 Self { view, start, end }
71 }
72
73 pub fn at(&self, index: usize) -> Grapheme<'a> {
74 self.view[index]
75 }
76
77 pub fn trim(&self, pattern: &GraphemePatternPair<'a>) -> Self {
78 let (left_pattern, right_pattern) = pattern;
79 self.trim_left(left_pattern).trim_right(right_pattern)
80 }
81
82 pub fn trim_left(&self, pattern: &GraphemePattern<'a>) -> Self {
83 let new_offset = self.find(pattern, self.start);
84 Self {
85 view: self.view.clone(),
86 start: new_offset,
87 end: self.end,
88 }
89 }
90
91 pub fn trim_right(&self, pattern: &GraphemePattern<'a>) -> Self {
92 let new_offset = self.rfind(pattern, self.end);
93 Self {
94 view: self.view.clone(),
95 start: self.start,
96 end: new_offset,
97 }
98 }
99
100 pub fn splice(&self, skip_pattern: &GraphemePatternPair<'a>) -> Self {
101 let (left_pattern, right_pattern) = skip_pattern;
102 let mut new_view = GraphemeStringView::with_capacity(self.end - self.start);
103 let mut offset = self.start;
104 let l_pattern_s = left_pattern.len();
105
106 while offset < self.end {
107 let target_s = self.find(left_pattern, offset) + l_pattern_s;
108 for i in offset..target_s {
109 new_view.push(self.view[i]);
110 }
111 offset = self.find(right_pattern, target_s);
112 }
113
114 GraphemeStr::from_view(new_view)
115 }
116
117 pub fn find(&self, pattern: &GraphemePattern<'a>, offset: usize) -> usize {
118 let pattern_s = pattern.len();
119 let mut new_offset = offset;
120 let mut pattern_end = new_offset + pattern_s;
121
122 while new_offset < self.end && pattern_end < self.end {
123 if self.view[new_offset..pattern_end] == **pattern {
124 break;
125 }
126
127 new_offset += 1;
128 pattern_end = new_offset + pattern_s;
129 }
130
131 new_offset
132 }
133
134 pub fn rfind(&self, pattern: &GraphemePattern<'a>, offset: usize) -> usize {
135 let pattern_s = pattern.len();
136 let mut new_offset = offset;
137 while new_offset > self.start {
138 if self.view[new_offset - pattern_s..new_offset] == **pattern {
139 break;
140 }
141
142 new_offset -= 1;
143 }
144
145 new_offset
146 }
147
148 pub fn len(&self) -> usize {
149 self.end - self.start
150 }
151
152 pub fn get_graphemes(&self) -> GraphemeSlice<'_, 'a> {
153 &self.view[self.start..self.end]
154 }
155
156 pub fn truncate(&self, size: usize) -> Self {
157 let end = min(size, self.end);
158 Self {
159 view: self.view.clone(),
160 start: self.start,
161 end,
162 }
163 }
164
165 pub fn is_unique(&self) -> bool {
166 is_unique(&self.view)
167 }
168}
169
170impl ToString for GraphemeStr<'_> {
171 fn to_string(&self) -> String {
172 let mut new_string = String::with_capacity(self.len());
173
174 for grapheme in self.view[self.start..self.end].iter() {
175 new_string.push_str(grapheme);
176 }
177
178 new_string
179 }
180}
181
182impl RUMStringConversions for GraphemeStr<'_> {}
183
184pub trait StringLike {
187 fn with_capacity(capacity: usize) -> Self;
188 fn push_str(&mut self, string: &str);
189}
190
191pub trait AsString {
192 fn as_string(&self) -> RUMString;
193}
194
195pub trait AsStr {
196 fn as_str(&self) -> &str;
197 fn as_grapheme_str(&self) -> GraphemeStr {
198 GraphemeStr::from(self.as_str())
199 }
200}
201
202pub trait RUMStringConversions: ToString {
203 #[inline(always)]
204 fn to_raw(&self) -> RUMVec<u8> {
205 self.to_string().as_bytes().to_vec()
206 }
207
208 #[inline(always)]
209 fn to_buffer(&self) -> RUMBuffer {
210 string_to_buffer(self.to_string().as_str())
211 }
212}
213
214pub trait StringUtils: AsStr + RUMStringConversions {
215 #[inline(always)]
216 fn duplicate(&self, count: usize) -> RUMString {
217 let mut duplicated = RUMString::with_capacity(count);
218 for i in 0..count {
219 duplicated += &self.as_str();
220 }
221 duplicated
222 }
223
224 fn truncate(&self, count: usize) -> RUMString {
225 self.as_grapheme_str().truncate(count).to_string()
226 }
227}
228
229impl AsStr for String {
230 fn as_str(&self) -> &str {
231 self.as_str()
232 }
233}
234
235impl RUMStringConversions for RUMString {}
236impl StringUtils for RUMString {}
237
238impl RUMStringConversions for str {}
239
240impl AsStr for str {
241 fn as_str(&self) -> &str {
242 self
243 }
244}
245
246impl StringUtils for str {}
247
248impl RUMStringConversions for char {}
249
250pub trait RUMArrayConversions {
251 fn to_string(&self) -> RUMResult<RUMString>;
252}
253
254impl RUMArrayConversions for Vec<u8> {
255 #[inline(always)]
256 fn to_string(&self) -> RUMResult<RUMString> {
257 match RUMString::from_utf8(self.to_owned()) {
258 Ok(s) => Ok(s),
259 Err(e) => Err(rumtk_format!("Failure to parse incoming UTF-8 string: {}", e))
260 }
261 }
262}
263
264impl RUMArrayConversions for &[u8] {
265 #[inline(always)]
266 fn to_string(&self) -> RUMResult<RUMString> {
267 match RUMString::from_utf8(self.to_vec()) {
268 Ok(s) => Ok(s),
269 Err(e) => Err(rumtk_format!("Failure to parse incoming UTF-8 string: {}", e))
270 }
271 }
272}
273
274impl AsString for u8 {
275 fn as_string(&self) -> RUMString {
276 RUMString::from(char::from_u32((*self).into()).unwrap_or_default())
277 }
278}
279
280pub fn count_tokens_ignoring_pattern(vector: &Vec<&str>, string_token: &RUMString) -> usize {
283 let mut count: usize = 0;
284 for tok in vector.iter() {
285 if string_token != tok {
286 count += 1;
287 }
288 }
289 count
290}
291
292pub fn try_decode(src: &[u8]) -> RUMResult<RUMString> {
299 let mut detector = EncodingDetector::new();
300 detector.feed(&src, true);
301 let encoding = detector.guess(None, true);
302 decode(src, encoding)
303}
304
305pub fn try_decode_with(src: &[u8], encoding_name: &str) -> RUMResult<RUMString> {
311 let encoding = match Encoding::for_label(encoding_name.as_bytes()) {
312 Some(v) => v,
313 None => return Ok(EMPTY_RUMSTRING.clone()),
314 };
315 decode(src, encoding)
316}
317
318fn decode(src: &[u8], encoding: &'static Encoding) -> RUMResult<RUMString> {
324 Ok(match encoding.decode_without_bom_handling_and_without_replacement(&src) {
325 Some(res) => RUMString::from(res),
326 None => src.to_string()?,
327 })
328}
329
330pub fn unescape_string(escaped_str: &str) -> RUMResult<RUMString> {
339 let graphemes = escaped_str.graphemes(true).collect::<Vec<&str>>();
340 let str_size = graphemes.len();
341 let mut result: Vec<u8> = Vec::with_capacity(escaped_str.len());
342 let mut i = 0;
343 while i < str_size {
344 let seq_start = graphemes[i];
345 match seq_start {
346 "\\" => {
347 let escape_seq = get_grapheme_string(&graphemes, " ", i);
348 let mut c = match unescape(&escape_seq) {
349 Ok(c) => c,
350 Err(_why) => Vec::from(escape_seq.as_bytes()),
351 };
352 result.append(&mut c);
353 i += &escape_seq.as_grapheme_str().len();
354 }
355 _ => {
356 result.append(&mut Vec::from(seq_start.as_bytes()));
357 i += 1;
358 }
359 }
360 }
361 Ok(try_decode(result.as_slice())?)
362}
363
364pub fn get_grapheme_string<'a>(
368 graphemes: &Vec<&'a str>,
369 end_grapheme: &str,
370 start_index: usize,
371) -> RUMString {
372 get_grapheme_collection(graphemes, end_grapheme, start_index).join("")
373}
374
375pub fn get_grapheme_collection<'a>(
382 graphemes: &Vec<&'a str>,
383 end_grapheme: &str,
384 start_index: usize,
385) -> Vec<&'a str> {
386 let mut result: Vec<&'a str> = Vec::new();
387 for grapheme in graphemes.iter().skip(start_index) {
388 let item = *grapheme;
389 if item == end_grapheme {
390 break;
391 }
392 result.push(item);
393 }
394 result
395}
396
397pub fn unescape(escaped_str: &str) -> Result<Vec<u8>, RUMString> {
408 let lower_case = escaped_str.to_lowercase();
409 let mut bytes: Vec<u8> = Vec::with_capacity(3);
410 match &lower_case[0..2] {
411 "\\x" => {
413 let byte_str = number_to_char_unchecked(&hex_to_number(&lower_case[2..6])?);
414 bytes.append(&mut byte_str.as_bytes().to_vec());
415 }
416 "\\u" => {
418 let byte_str = number_to_char_unchecked(&hex_to_number(&lower_case[2..6])?);
419 bytes.append(&mut byte_str.as_bytes().to_vec());
420 }
421 "\\c" => {
423 let byte_str = number_to_char_unchecked(&hex_to_number(&lower_case[2..6])?);
424 bytes.append(&mut byte_str.as_bytes().to_vec());
425 }
426 "\\o" => {
428 let byte_str = number_to_char_unchecked(&octal_to_number(&lower_case[2..6])?);
429 bytes.append(&mut byte_str.as_bytes().to_vec());
430 }
431 "\\m" => match lower_case.as_grapheme_str().len() {
433 8 => {
434 bytes.push(hex_to_byte(&lower_case[2..4])?);
435 bytes.push(hex_to_byte(&lower_case[4..6])?);
436 bytes.push(hex_to_byte(&lower_case[6..8])?);
437 }
438 6 => {
439 bytes.push(hex_to_byte(&lower_case[2..4])?);
440 bytes.push(hex_to_byte(&lower_case[4..6])?);
441 }
442 _ => {
443 return Err(rumtk_format!(
444 "Unknown multibyte sequence. Cannot decode {}",
445 lower_case
446 ))
447 }
448 },
449 "\\z" => bytes.append(&mut lower_case.as_bytes().to_vec()),
451 _ => bytes.push(unescape_control_byte(&lower_case[0..2])?),
453 }
454 Ok(bytes)
455}
456
457fn unescape_control(escaped_str: &str) -> Result<char, RUMString> {
462 match escaped_str {
463 "\\t" => Ok('\t'),
465 "\\b" => Ok('\x08'),
466 "\\n" => Ok('\n'),
467 "\\r" => Ok('\r'),
468 "\\f" => Ok('\x14'),
469 "\\s" => Ok('\x20'),
470 "\\\\" => Ok(ASCII_ESCAPE_CHAR),
471 "\\'" => Ok('\''),
472 "\\\"" => Ok('"'),
473 "\\0" => Ok('\0'),
474 "\\v" => Ok('\x0B'),
475 "\\a" => Ok('\x07'),
476 _ => Err(rumtk_format!(
478 "Unknown escape sequence? Sequence: {}!",
479 escaped_str
480 )),
481 }
482}
483
484fn unescape_control_byte(escaped_str: &str) -> Result<u8, RUMString> {
489 match escaped_str {
490 "\\t" => Ok(9), "\\b" => Ok(8), "\\n" => Ok(10), "\\r" => Ok(13), "\\f" => Ok(12), "\\s" => Ok(32), "\\\\" => Ok(27), "\\'" => Ok(39), "\\\"" => Ok(34), "\\0" => Ok(0), "\\v" => Ok(11), "\\a" => Ok(7), _ => hex_to_byte(escaped_str),
506 }
507}
508
509fn hex_to_number(hex_str: &str) -> Result<u32, RUMString> {
513 match u32::from_str_radix(&hex_str, 16) {
514 Ok(result) => Ok(result),
515 Err(val) => Err(rumtk_format!(
516 "Failed to parse string with error {}! Input string {} \
517 is not hex string!",
518 val,
519 hex_str
520 )),
521 }
522}
523
524fn hex_to_byte(hex_str: &str) -> Result<u8, RUMString> {
528 match u8::from_str_radix(&hex_str, 16) {
529 Ok(result) => Ok(result),
530 Err(val) => Err(rumtk_format!(
531 "Failed to parse string with error {}! Input string {} \
532 is not hex string!",
533 val,
534 hex_str
535 )),
536 }
537}
538
539fn octal_to_number(hoctal_str: &str) -> Result<u32, RUMString> {
543 match u32::from_str_radix(&hoctal_str, 8) {
544 Ok(result) => Ok(result),
545 Err(val) => Err(rumtk_format!(
546 "Failed to parse string with error {}! Input string {} \
547 is not an octal string!",
548 val,
549 hoctal_str
550 )),
551 }
552}
553
554fn octal_to_byte(hoctal_str: &str) -> Result<u8, RUMString> {
558 match u8::from_str_radix(&hoctal_str, 8) {
559 Ok(result) => Ok(result),
560 Err(val) => Err(rumtk_format!(
561 "Failed to parse string with error {}! Input string {} \
562 is not an octal string!",
563 val,
564 hoctal_str
565 )),
566 }
567}
568
569fn number_to_char(num: &u32) -> Result<RUMString, RUMString> {
573 match char::from_u32(*num) {
574 Some(result) => Ok(result.to_string()),
575 None => Err(rumtk_format!(
576 "Failed to cast number to character! Number {}",
577 num
578 )),
579 }
580}
581
582fn number_to_char_unchecked(num: &u32) -> RUMString {
588 unsafe { char::from_u32_unchecked(*num).to_string() }
589}
590
591pub fn escape(unescaped_str: &str) -> RUMString {
603 basic_escape(unescaped_str, &vec![("{", ""), ("}", "")])
604}
605
606pub fn basic_escape(unescaped_str: &str, except: EscapeExceptions) -> RUMString {
617 let escaped = is_escaped_str(unescaped_str);
618 if !escaped {
619 let mut escaped_str = unescaped_str.escape_default().to_string();
620 for (from, to) in except {
621 escaped_str = escaped_str.replace(from, to);
622 }
623 return escaped_str.to_string();
624 }
625 unescaped_str.to_string()
626}
627
628pub fn is_ascii_str(unescaped_str: &str) -> bool {
635 unescaped_str.is_ascii()
636}
637
638pub fn is_escaped_str(unescaped_str: &str) -> bool {
647 if !is_ascii_str(unescaped_str) {
648 return false;
649 }
650
651 for c in unescaped_str.chars() {
652 if !is_printable_char(&c) {
653 return false;
654 }
655 }
656 true
657}
658
659pub fn is_printable_char(c: &char) -> bool {
663 &MIN_ASCII_READABLE <= c && c <= &MAX_ASCII_READABLE
664}
665
666pub fn filter_ascii(unescaped_str: &str, closure: fn(char) -> bool) -> RUMString {
670 let mut filtered = unescaped_str.to_string();
671 filtered.retain(closure);
672 filtered
673}
674
675pub fn filter_non_printable_ascii(unescaped_str: &str) -> RUMString {
679 filter_ascii(unescaped_str, |c: char| is_printable_char(&c))
680}
681
682pub fn string_to_buffer(data: &str) -> RUMBuffer {
699 RUMBuffer::copy_from_slice(data.as_bytes())
700}
701
702pub fn string_format(input: &str, formatting: &StringReplacementPair) -> RUMString {
718 let mut output = String::from(input);
719
720 for item in formatting.iter() {
721 output = output.as_str().replace(item.0, item.1);
722 }
723
724 output.to_string()
725}
726
727pub fn string_to_b64(data: &str) -> String {
736 BASE64_STANDARD.encode(data)
737}
738
739pub fn b64_to_string(data: &String) -> RUMResult<RUMVec<u8>> {
747 match BASE64_STANDARD.decode(data) {
748 Ok(result) => Ok(result),
749 Err(e) => Err(rumtk_format!("Failed to decode base64 string: {}", e)),
750 }
751}