1use crate::core::{is_unique, RUMResult, RUMVec};
21use crate::types::RUMBuffer;
22use chardetng::EncodingDetector;
23pub use compact_str::{
24 format_compact as rumtk_format, CompactString, CompactStringExt, ToCompactString,
25};
26use encoding_rs::Encoding;
27use std::cmp::min;
28use unicode_segmentation::UnicodeSegmentation;
29const ESCAPED_STRING_WINDOW: usize = 6;
31const ASCII_ESCAPE_CHAR: char = '\\';
32const MIN_ASCII_READABLE: char = ' ';
33const MAX_ASCII_READABLE: char = '~';
34pub const EMPTY_STRING: &str = "";
35pub const DOT_STR: &str = ".";
36pub const EMPTY_STRING_OPTION: Option<&str> = Some("");
37pub const READABLE_ASCII: &str = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~";
38
39pub type RUMString = CompactString;
41pub type EscapeException<'a> = (&'a str, &'a str);
42pub type EscapeExceptions<'a> = &'a [EscapeException<'a>];
43pub type Grapheme<'a> = &'a str;
44pub type GraphemeStringView<'a> = RUMVec<Grapheme<'a>>;
45pub type GraphemePattern<'a> = &'a [Grapheme<'a>];
46pub type GraphemeSlice<'b, 'a> = &'b [Grapheme<'a>];
47pub type GraphemePatternPair<'a> = (GraphemePattern<'a>, GraphemePattern<'a>);
48
49#[derive(Default, Debug, PartialEq, Clone)]
54pub struct GraphemeStr<'a> {
55 view: GraphemeStringView<'a>,
56 start: usize,
57 end: usize,
58}
59
60impl<'a> GraphemeStr<'a> {
61 pub fn from(string: &'a str) -> Self {
62 let view = string.graphemes(true).collect::<GraphemeStringView>();
63 Self::from_view(view)
64 }
65
66 pub fn from_view(view: GraphemeStringView<'a>) -> Self {
67 let start = 0;
68 let end = view.len();
69 Self { view, start, end }
70 }
71
72 pub fn at(&self, index: usize) -> Grapheme<'a> {
73 self.view[index]
74 }
75
76 pub fn trim(&self, pattern: &GraphemePatternPair<'a>) -> Self {
77 let (left_pattern, right_pattern) = pattern;
78 self.trim_left(left_pattern).trim_right(right_pattern)
79 }
80
81 pub fn trim_left(&self, pattern: &GraphemePattern<'a>) -> Self {
82 let new_offset = self.find(pattern, self.start);
83 Self {
84 view: self.view.clone(),
85 start: new_offset,
86 end: self.end,
87 }
88 }
89
90 pub fn trim_right(&self, pattern: &GraphemePattern<'a>) -> Self {
91 let new_offset = self.rfind(pattern, self.end);
92 Self {
93 view: self.view.clone(),
94 start: self.start,
95 end: new_offset,
96 }
97 }
98
99 pub fn splice(&self, skip_pattern: &GraphemePatternPair<'a>) -> Self {
100 let (left_pattern, right_pattern) = skip_pattern;
101 let mut new_view = GraphemeStringView::with_capacity(self.end - self.start);
102 let mut offset = self.start;
103 let l_pattern_s = left_pattern.len();
104
105 while offset < self.end {
106 let target_s = self.find(left_pattern, offset) + l_pattern_s;
107 for i in offset..target_s {
108 new_view.push(self.view[i]);
109 }
110 offset = self.find(right_pattern, target_s);
111 }
112
113 GraphemeStr::from_view(new_view)
114 }
115
116 pub fn find(&self, pattern: &GraphemePattern<'a>, offset: usize) -> usize {
117 let pattern_s = pattern.len();
118 let mut new_offset = offset;
119 let mut pattern_end = new_offset + pattern_s;
120
121 while new_offset < self.end && pattern_end < self.end {
122 if self.view[new_offset..pattern_end] == **pattern {
123 break;
124 }
125
126 new_offset += 1;
127 pattern_end = new_offset + pattern_s;
128 }
129
130 new_offset
131 }
132
133 pub fn rfind(&self, pattern: &GraphemePattern<'a>, offset: usize) -> usize {
134 let pattern_s = pattern.len();
135 let mut new_offset = offset;
136 while new_offset > self.start {
137 if self.view[new_offset - pattern_s..new_offset] == **pattern {
138 break;
139 }
140
141 new_offset -= 1;
142 }
143
144 new_offset
145 }
146
147 pub fn len(&self) -> usize {
148 self.end - self.start
149 }
150
151 pub fn get_graphemes(&self) -> GraphemeSlice<'_, 'a> {
152 &self.view[self.start..self.end]
153 }
154
155 pub fn truncate(&self, size: usize) -> Self {
156 let end = min(size, self.end);
157 Self {
158 view: self.view.clone(),
159 start: self.start,
160 end,
161 }
162 }
163
164 pub fn is_unique(&self) -> bool {
165 is_unique(&self.view)
166 }
167}
168
169impl ToString for GraphemeStr<'_> {
170 fn to_string(&self) -> String {
171 let mut new_string = String::with_capacity(self.len());
172
173 for grapheme in self.view[self.start..self.end].iter() {
174 new_string.push_str(grapheme);
175 }
176
177 new_string
178 }
179}
180
181impl RUMStringConversions for GraphemeStr<'_> {}
182
183pub trait StringLike {
186 fn with_capacity(capacity: usize) -> Self;
187 fn push_str(&mut self, string: &str);
188}
189
190pub trait AsStr {
191 fn as_str(&self) -> &str;
192 fn as_grapheme_str(&self) -> GraphemeStr {
193 GraphemeStr::from(self.as_str())
194 }
195}
196
197pub trait RUMStringConversions: ToString {
198 #[inline(always)]
199 fn to_rumstring(&self) -> RUMString {
200 RUMString::from(self.to_string())
201 }
202
203 #[inline(always)]
204 fn to_raw(&self) -> RUMVec<u8> {
205 self.to_string().as_bytes().to_vec()
206 }
207
208 #[inline(always)]
209 fn to_buffer(&self) -> RUMBuffer {
210 RUMBuffer::from(self.to_string())
211 }
212}
213
214pub trait StringUtils: AsStr + RUMStringConversions {
215 #[inline(always)]
216 fn duplicate(&self, count: usize) -> RUMString {
217 let mut duplicated = RUMString::with_capacity(count);
218 for i in 0..count {
219 duplicated += &self.as_str();
220 }
221 duplicated
222 }
223
224 fn truncate(&self, count: usize) -> RUMString {
225 self.as_grapheme_str().truncate(count).to_rumstring()
226 }
227}
228
229impl AsStr for String {
230 fn as_str(&self) -> &str {
231 self.as_str()
232 }
233}
234
235impl RUMStringConversions for RUMString {}
236impl AsStr for RUMString {
237 fn as_str(&self) -> &str {
238 self.as_str()
239 }
240}
241impl StringUtils for RUMString {}
242
243impl RUMStringConversions for str {}
244
245impl AsStr for str {
246 fn as_str(&self) -> &str {
247 self
248 }
249}
250
251impl StringUtils for str {}
252
253impl RUMStringConversions for char {}
254
255pub trait RUMArrayConversions {
256 fn to_rumstring(&self) -> RUMString;
257}
258
259impl RUMArrayConversions for Vec<u8> {
260 fn to_rumstring(&self) -> RUMString {
261 self.as_slice().to_rumstring()
262 }
263}
264
265impl RUMArrayConversions for &[u8] {
266 fn to_rumstring(&self) -> RUMString {
267 RUMString::from_utf8(&self).unwrap()
268 }
269}
270
271pub fn count_tokens_ignoring_pattern(vector: &Vec<&str>, string_token: &RUMString) -> usize {
274 let mut count: usize = 0;
275 for tok in vector.iter() {
276 if string_token != tok {
277 count += 1;
278 }
279 }
280 count
281}
282
283pub fn try_decode(src: &[u8]) -> RUMString {
290 let mut detector = EncodingDetector::new();
291 detector.feed(&src, true);
292 let encoding = detector.guess(None, true);
293 decode(src, encoding)
294}
295
296pub fn try_decode_with(src: &[u8], encoding_name: &str) -> RUMString {
302 let encoding = match Encoding::for_label(encoding_name.as_bytes()) {
303 Some(v) => v,
304 None => return RUMString::from(""),
305 };
306 decode(src, encoding)
307}
308
309fn decode(src: &[u8], encoding: &'static Encoding) -> RUMString {
315 match encoding.decode_without_bom_handling_and_without_replacement(&src) {
316 Some(res) => RUMString::from(res),
317 None => RUMString::from_utf8(src).unwrap(),
318 }
319}
320
321pub fn unescape_string(escaped_str: &str) -> RUMResult<RUMString> {
330 let graphemes = escaped_str.graphemes(true).collect::<Vec<&str>>();
331 let str_size = graphemes.len();
332 let mut result: Vec<u8> = Vec::with_capacity(escaped_str.len());
333 let mut i = 0;
334 while i < str_size {
335 let seq_start = graphemes[i];
336 match seq_start {
337 "\\" => {
338 let escape_seq = get_grapheme_string(&graphemes, " ", i);
339 let mut c = match unescape(&escape_seq) {
340 Ok(c) => c,
341 Err(_why) => Vec::from(escape_seq.as_bytes()),
342 };
343 result.append(&mut c);
344 i += &escape_seq.as_grapheme_str().len();
345 }
346 _ => {
347 result.append(&mut Vec::from(seq_start.as_bytes()));
348 i += 1;
349 }
350 }
351 }
352 Ok(try_decode(result.as_slice()))
353}
354
355pub fn get_grapheme_string<'a>(
359 graphemes: &Vec<&'a str>,
360 end_grapheme: &str,
361 start_index: usize,
362) -> RUMString {
363 get_grapheme_collection(graphemes, end_grapheme, start_index).join_compact("")
364}
365
366pub fn get_grapheme_collection<'a>(
373 graphemes: &Vec<&'a str>,
374 end_grapheme: &str,
375 start_index: usize,
376) -> Vec<&'a str> {
377 let mut result: Vec<&'a str> = Vec::new();
378 for grapheme in graphemes.iter().skip(start_index) {
379 let item = *grapheme;
380 if item == end_grapheme {
381 break;
382 }
383 result.push(item);
384 }
385 result
386}
387
388pub fn unescape(escaped_str: &str) -> Result<Vec<u8>, RUMString> {
399 let lower_case = escaped_str.to_lowercase();
400 let mut bytes: Vec<u8> = Vec::with_capacity(3);
401 match &lower_case[0..2] {
402 "\\x" => {
404 let byte_str = number_to_char_unchecked(&hex_to_number(&lower_case[2..6])?);
405 bytes.append(&mut byte_str.as_bytes().to_vec());
406 }
407 "\\u" => {
409 let byte_str = number_to_char_unchecked(&hex_to_number(&lower_case[2..6])?);
410 bytes.append(&mut byte_str.as_bytes().to_vec());
411 }
412 "\\c" => {
414 let byte_str = number_to_char_unchecked(&hex_to_number(&lower_case[2..6])?);
415 bytes.append(&mut byte_str.as_bytes().to_vec());
416 }
417 "\\o" => {
419 let byte_str = number_to_char_unchecked(&octal_to_number(&lower_case[2..6])?);
420 bytes.append(&mut byte_str.as_bytes().to_vec());
421 }
422 "\\m" => match lower_case.as_grapheme_str().len() {
424 8 => {
425 bytes.push(hex_to_byte(&lower_case[2..4])?);
426 bytes.push(hex_to_byte(&lower_case[4..6])?);
427 bytes.push(hex_to_byte(&lower_case[6..8])?);
428 }
429 6 => {
430 bytes.push(hex_to_byte(&lower_case[2..4])?);
431 bytes.push(hex_to_byte(&lower_case[4..6])?);
432 }
433 _ => {
434 return Err(rumtk_format!(
435 "Unknown multibyte sequence. Cannot decode {}",
436 lower_case
437 ))
438 }
439 },
440 "\\z" => bytes.append(&mut lower_case.as_bytes().to_vec()),
442 _ => bytes.push(unescape_control_byte(&lower_case[0..2])?),
444 }
445 Ok(bytes)
446}
447
448fn unescape_control(escaped_str: &str) -> Result<char, RUMString> {
453 match escaped_str {
454 "\\t" => Ok('\t'),
456 "\\b" => Ok('\x08'),
457 "\\n" => Ok('\n'),
458 "\\r" => Ok('\r'),
459 "\\f" => Ok('\x14'),
460 "\\s" => Ok('\x20'),
461 "\\\\" => Ok(ASCII_ESCAPE_CHAR),
462 "\\'" => Ok('\''),
463 "\\\"" => Ok('"'),
464 "\\0" => Ok('\0'),
465 "\\v" => Ok('\x0B'),
466 "\\a" => Ok('\x07'),
467 _ => Err(rumtk_format!(
469 "Unknown escape sequence? Sequence: {}!",
470 escaped_str
471 )),
472 }
473}
474
475fn unescape_control_byte(escaped_str: &str) -> Result<u8, RUMString> {
480 match escaped_str {
481 "\\t" => Ok(9), "\\b" => Ok(8), "\\n" => Ok(10), "\\r" => Ok(13), "\\f" => Ok(12), "\\s" => Ok(32), "\\\\" => Ok(27), "\\'" => Ok(39), "\\\"" => Ok(34), "\\0" => Ok(0), "\\v" => Ok(11), "\\a" => Ok(7), _ => hex_to_byte(escaped_str),
497 }
498}
499
500fn hex_to_number(hex_str: &str) -> Result<u32, RUMString> {
504 match u32::from_str_radix(&hex_str, 16) {
505 Ok(result) => Ok(result),
506 Err(val) => Err(rumtk_format!(
507 "Failed to parse string with error {}! Input string {} \
508 is not hex string!",
509 val,
510 hex_str
511 )),
512 }
513}
514
515fn hex_to_byte(hex_str: &str) -> Result<u8, RUMString> {
519 match u8::from_str_radix(&hex_str, 16) {
520 Ok(result) => Ok(result),
521 Err(val) => Err(rumtk_format!(
522 "Failed to parse string with error {}! Input string {} \
523 is not hex string!",
524 val,
525 hex_str
526 )),
527 }
528}
529
530fn octal_to_number(hoctal_str: &str) -> Result<u32, RUMString> {
534 match u32::from_str_radix(&hoctal_str, 8) {
535 Ok(result) => Ok(result),
536 Err(val) => Err(rumtk_format!(
537 "Failed to parse string with error {}! Input string {} \
538 is not an octal string!",
539 val,
540 hoctal_str
541 )),
542 }
543}
544
545fn octal_to_byte(hoctal_str: &str) -> Result<u8, RUMString> {
549 match u8::from_str_radix(&hoctal_str, 8) {
550 Ok(result) => Ok(result),
551 Err(val) => Err(rumtk_format!(
552 "Failed to parse string with error {}! Input string {} \
553 is not an octal string!",
554 val,
555 hoctal_str
556 )),
557 }
558}
559
560fn number_to_char(num: &u32) -> Result<RUMString, RUMString> {
564 match char::from_u32(*num) {
565 Some(result) => Ok(result.to_rumstring()),
566 None => Err(rumtk_format!(
567 "Failed to cast number to character! Number {}",
568 num
569 )),
570 }
571}
572
573fn number_to_char_unchecked(num: &u32) -> RUMString {
579 unsafe { char::from_u32_unchecked(*num).to_rumstring() }
580}
581
582pub fn escape(unescaped_str: &str) -> RUMString {
594 basic_escape(unescaped_str, &vec![("{", ""), ("}", "")])
595}
596
597pub fn basic_escape(unescaped_str: &str, except: EscapeExceptions) -> RUMString {
608 let escaped = is_escaped_str(unescaped_str);
609 if !escaped {
610 let mut escaped_str = unescaped_str.escape_default().to_string();
611 for (from, to) in except {
612 escaped_str = escaped_str.replace(from, to);
613 }
614 return escaped_str.to_rumstring();
615 }
616 unescaped_str.to_rumstring()
617}
618
619pub fn is_ascii_str(unescaped_str: &str) -> bool {
626 unescaped_str.is_ascii()
627}
628
629pub fn is_escaped_str(unescaped_str: &str) -> bool {
638 if !is_ascii_str(unescaped_str) {
639 return false;
640 }
641
642 for c in unescaped_str.chars() {
643 if !is_printable_char(&c) {
644 return false;
645 }
646 }
647 true
648}
649
650pub fn is_printable_char(c: &char) -> bool {
654 &MIN_ASCII_READABLE <= c && c <= &MAX_ASCII_READABLE
655}
656
657pub fn filter_ascii(unescaped_str: &str, closure: fn(char) -> bool) -> RUMString {
661 let mut filtered = unescaped_str.to_rumstring();
662 filtered.retain(closure);
663 filtered
664}
665
666pub fn filter_non_printable_ascii(unescaped_str: &str) -> RUMString {
670 filter_ascii(unescaped_str, |c: char| is_printable_char(&c))
671}