polyvalue/types/
string.rs

1//! String type
2//!
3//! This type is a wrapper around `String`
4//!
5//! Like all subtypes, it is hashable, serializable, and fully comparable
6//! It is represented as a string in the form of `<value>`
7//!
8use crate::{operations::*, types::*, Error, InnerValue, Value, ValueTrait, ValueType};
9use serde::{Deserialize, Serialize};
10use std::ops::{Range, RangeInclusive};
11
12/// Subtype of `Value` that represents a string
13#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash, Serialize, Deserialize, Default)]
14pub struct Str(String);
15impl_value!(
16    Str,
17    String,
18    |v: &Self| v.inner().clone(),
19    |v: &Self, f: &mut std::fmt::Formatter<'_>| { write!(f, "{}", v.to_escaped_string()) }
20);
21
22impl From<&str> for Str {
23    fn from(value: &str) -> Self {
24        <Str>::new(value.into())
25    }
26}
27
28impl From<&str> for Value {
29    fn from(value: &str) -> Self {
30        <Str>::new(value.into()).into()
31    }
32}
33
34impl Str {
35    /// Maps a range of values to a range of bytes in a string coresponding to the same characters
36    /// This is necessary because the string is UTF-8 encoded
37    /// Can fail if the range is out of bounds, or if the range is not a valid integer range
38    fn map_range_to_bytes(&self, range: RangeInclusive<&Value>) -> Result<Range<usize>, Error> {
39        let mut range = *I64::try_from((*range.start()).clone())?.inner()
40            ..*I64::try_from((*range.end()).clone())?.inner();
41
42        let chars = self.inner().chars().count() as i64;
43        if range.start < 0 {
44            range.start += chars;
45        }
46        if range.end < 0 {
47            range.end += chars;
48        }
49        let mut range = range.start as usize..range.end as usize;
50
51        // Get the byte-index of the nth character of self.inner()
52        // This is necessary because the string is UTF-8 encoded
53        // and we need to get the nth character, not the nth byte
54        let mut byte_index = 0;
55        for _ in 0..range.start {
56            byte_index += self
57                .inner()
58                .get(byte_index..)
59                .ok_or(Error::Index {
60                    key: range.start.to_string(),
61                })?
62                .chars()
63                .next()
64                .ok_or(Error::Index {
65                    key: range.start.to_string(),
66                })?
67                .len_utf8();
68        }
69        range.start = byte_index;
70
71        // and the start of the next
72        let mut byte_index = 0;
73        for _ in 0..range.end + 1 {
74            byte_index += self
75                .inner()
76                .get(byte_index..)
77                .ok_or(Error::Index {
78                    key: range.end.to_string(),
79                })?
80                .chars()
81                .next()
82                .ok_or(Error::Index {
83                    key: range.end.to_string(),
84                })?
85                .len_utf8();
86        }
87        range.end = byte_index - 1;
88
89        Ok(range)
90    }
91
92    /// String indexing
93    /// Returns a substring
94    ///
95    /// Although this looks like the IndexingOperationExt trait, it is not
96    /// because it returns a string instead of a value
97    pub fn substr(&self, index: RangeInclusive<&Value>) -> Result<&str, crate::Error> {
98        let range = self.map_range_to_bytes(index)?;
99        self.inner()
100            .get(range.start..=range.end)
101            .ok_or(Error::Index {
102                key: format!("{}..{}", range.start, range.end),
103            })
104    }
105
106    /// Mutable string indexing
107    /// Returns a mutable substring
108    ///
109    /// Although this looks like the IndexingOperationExt trait, it is not
110    /// because it returns a string instead of a value
111    pub fn mut_substr(&mut self, index: RangeInclusive<&Value>) -> Result<&mut str, crate::Error> {
112        let range = self.map_range_to_bytes(index)?;
113        self.inner_mut()
114            .get_mut(range.start..=range.end)
115            .ok_or(Error::Index {
116                key: format!("{}..{}", range.start, range.end),
117            })
118    }
119
120    /// Replace a set of characters in the string
121    ///
122    /// Although this looks like the IndexingOperationExt trait, it is not
123    /// because it returns a string instead of a value
124    pub fn set_substr(
125        &mut self,
126        index: RangeInclusive<&Value>,
127        value: Value,
128    ) -> Result<(), crate::Error> {
129        let range = *I64::try_from((*index.start()).clone())?.inner() as usize
130            ..*I64::try_from((*index.end()).clone())?.inner() as usize;
131
132        let value = Str::try_from(value)?.inner().clone();
133
134        let prefix = if range.start == 0 {
135            "".to_string()
136        } else {
137            self.substr(&0.into()..=&(range.start - 1).into())?
138                .to_string()
139        };
140
141        let char_count = self.inner_mut().chars().count();
142        let suffix = if range.end == char_count - 1 {
143            ""
144        } else {
145            self.substr(&(range.end + 1).into()..=&(char_count - 1).into())?
146        };
147
148        *self.inner_mut() = format!("{}{}{}", prefix, value, suffix);
149        Ok(())
150    }
151
152    /// Convert an index value to a range, useful for bridging the gap between
153    /// the IndexingOperationExt trait and the substr functions
154    /// Can fail if the index is not an array of integers, or if the array is empty
155    pub fn index_value_to_range(index: &Value) -> Result<std::ops::RangeInclusive<Value>, Error> {
156        // Convert index to a range - we will need an array of integers
157        let index = index.clone().as_a::<Array>()?;
158        let indices = index
159            .inner()
160            .iter()
161            .map(|v| Ok::<<I64 as ValueTrait>::Inner, Error>(*v.clone().as_a::<I64>()?.inner()))
162            .collect::<Result<Vec<_>, _>>()?;
163        if indices.is_empty() {
164            Err(Error::Index {
165                key: index.to_string(),
166            })?;
167        }
168
169        let start = Value::from(*indices.iter().min().unwrap());
170        let end = Value::from(*indices.iter().max().unwrap());
171        Ok(start..=end)
172    }
173
174    /// Returns a quoted string, with \, \n, \r, \t and " escaped
175    pub fn to_escaped_string(&self) -> String {
176        format!(
177            "\"{}\"",
178            self.inner()
179                .replace("\\", "\\\\")
180                .replace("\n", "\\n")
181                .replace("\r", "\\r")
182                .replace("\t", "\\t")
183                .replace("\"", "\\\"")
184        )
185    }
186}
187
188map_value!(
189    from = Str,
190    handle_into = (v) { Value::string(v) },
191    handle_from = (v) {
192        match v.inner() {
193            InnerValue::String(v) => Ok(v.clone()),
194            _ => Ok(Str::from(v.to_string())),
195        }
196    }
197);
198
199map_type!(Bool, Str);
200map_type!(Int, Str);
201map_type!(Float, Str);
202map_type!(Fixed, Str);
203map_type!(Currency, Str);
204map_type!(Array, Str);
205map_type!(Object, Str);
206
207overload_operator!(Str, add);
208overload_operator!(Str, sub);
209overload_operator!(Str, neg);
210overload_operator!(Str, deref);
211
212impl MatchingOperationExt for Str {
213    fn matching_op(
214        container: &Self,
215        pattern: &Value,
216        operation: MatchingOperation,
217    ) -> Result<Value, crate::Error>
218    where
219        Self: Sized,
220    {
221        let pattern = Str::try_from(pattern.clone())?;
222        let result = match operation {
223            MatchingOperation::Contains => {
224                let pattern = pattern.inner().as_str();
225                let pattern = convert_regex_string(pattern, |s: String| s)?;
226                pattern.is_match(container.inner().as_str())
227            }
228            MatchingOperation::StartsWith => {
229                container.inner().starts_with(pattern.inner().as_str())
230            }
231            MatchingOperation::EndsWith => container.inner().ends_with(pattern.inner().as_str()),
232            MatchingOperation::Matches => {
233                let pattern = pattern.inner().as_str();
234                let pattern = convert_regex_string(pattern, |mut s: String| {
235                    if !s.starts_with('^') {
236                        s = "^".to_string() + s.as_str();
237                    }
238                    if !s.ends_with('$') {
239                        s += "$"
240                    }
241                    s
242                })?;
243                pattern.is_match(container.inner().as_str())
244            }
245
246            // Handled by Value
247            _ => false,
248        };
249
250        Ok(result.into())
251    }
252}
253
254impl ArithmeticOperationExt for Str {
255    fn arithmetic_op(
256        self,
257        right: Self,
258        operation: ArithmeticOperation,
259    ) -> Result<Self, crate::Error> {
260        let left = self.into_inner();
261        let right = right.into_inner();
262
263        let result = match operation {
264            ArithmeticOperation::Add => left + right.as_str(),
265            ArithmeticOperation::Subtract => left.replace(&right, ""),
266
267            _ => Err(Error::UnsupportedOperation {
268                operation: operation.to_string(),
269                actual_type: ValueType::String,
270            })?,
271        };
272        Ok(result.into())
273    }
274
275    fn arithmetic_neg(self) -> Result<Self, crate::Error>
276    where
277        Self: Sized,
278    {
279        let s: String = self.chars().rev().collect();
280        Ok(s.into())
281    }
282}
283
284impl BooleanOperationExt for Str {
285    fn boolean_op(self, right: Self, operation: BooleanOperation) -> Result<Value, Error> {
286        let left = self.into_inner();
287        let right = right.into_inner();
288
289        let result = match operation {
290            BooleanOperation::And => !left.is_empty() && !right.is_empty(),
291            BooleanOperation::Or => !left.is_empty() || !right.is_empty(),
292
293            BooleanOperation::LT => left < right,
294            BooleanOperation::GT => left > right,
295            BooleanOperation::LTE => left <= right,
296            BooleanOperation::GTE => left >= right,
297            BooleanOperation::EQ | BooleanOperation::StrictEQ => left == right,
298            BooleanOperation::NEQ | BooleanOperation::StrictNEQ => left != right,
299        };
300
301        Ok(result.into())
302    }
303
304    fn boolean_not(self) -> Result<Value, crate::Error>
305    where
306        Self: Sized,
307    {
308        Ok(self.into_inner().is_empty().into())
309    }
310}
311
312impl IndexingOperationExt for Str {
313    fn get_index(&self, index: &Value) -> Result<Value, crate::Error> {
314        self.substr(index..=index).map(Value::from)
315    }
316
317    fn get_indices(&self, index: &Value) -> Result<Value, crate::Error> {
318        if index.is_a(ValueType::Range) {
319            let indices = index.clone().as_a::<crate::types::Range>()?.inner().clone();
320            let lower = Value::from(*indices.start());
321            let upper = Value::from(*indices.end());
322            self.substr(&lower..=&upper).map(Value::from)
323        } else {
324            let indices = index.clone().as_a::<Array>()?;
325            if indices.inner().is_empty() {
326                return Ok(Value::from(""));
327            }
328
329            let results = indices
330                .inner()
331                .iter()
332                .map(|i| {
333                    Ok(self
334                        .get_index(i)?
335                        .clone()
336                        .as_a::<Str>()?
337                        .inner()
338                        .to_string())
339                })
340                .collect::<Result<Vec<_>, Error>>()?;
341            // join into one Str
342            Ok(Value::from(results.join("")))
343        }
344    }
345}
346
347// This function will convert a string of either forms `/pattern/flags` or `pattern` to a regex object
348fn convert_regex_string<F>(input: &str, formatting_callback: F) -> Result<regex::Regex, Error>
349where
350    F: Fn(String) -> String,
351{
352    const FLAG_INREGEX: usize = 0b0001;
353    const FLAG_INFLAGS: usize = 0b0010;
354    const FLAG_ESCAPE: usize = 0b0100;
355
356    let mut pattern = String::new();
357    let mut flags = Vec::new();
358    let mut state = 0;
359    for char in input.chars() {
360        if state & FLAG_ESCAPE != 0 {
361            state &= !FLAG_ESCAPE;
362            pattern.push(char);
363            continue;
364        }
365
366        match char {
367            '/' if state & FLAG_INREGEX == 0 => {
368                state |= FLAG_INREGEX;
369            }
370            '/' => {
371                state &= !FLAG_INREGEX;
372                state |= FLAG_INFLAGS;
373            }
374            '\\' => {
375                state |= FLAG_ESCAPE;
376                pattern.push(char);
377            }
378            _ if state & FLAG_INFLAGS != 0 => {
379                flags.push(char);
380            }
381            _ => {
382                pattern.push(char);
383            }
384        }
385    }
386
387    // Catch the case where the string starts with a / but doesn't end with one
388    if state & FLAG_INREGEX != 0 {
389        pattern = input.to_string();
390    }
391
392    pattern = formatting_callback(pattern);
393
394    let mut regex = regex::RegexBuilder::new(&pattern);
395    for flag in flags {
396        match flag {
397            'i' => regex.case_insensitive(true),
398            'm' => regex.multi_line(true),
399            's' => regex.dot_matches_new_line(true),
400            'U' => regex.swap_greed(true),
401            'u' => regex.unicode(true),
402            'x' => regex.ignore_whitespace(true),
403            _ => {
404                return Err(Error::InvalidRegexFlag(flag.to_string()));
405            }
406        };
407    }
408
409    Ok(regex.build()?)
410}
411
412//
413// Tests
414//
415
416#[cfg(test)]
417mod test {
418    use super::*;
419
420    #[test]
421    fn test_decode_regex() {
422        let result = convert_regex_string("/pattern/", |s: String| s).unwrap();
423        assert_eq!(result.as_str(), "pattern");
424
425        let result = convert_regex_string("/pattern/i", |s: String| s).unwrap();
426        assert_eq!(result.as_str(), "pattern");
427        assert!(result.is_match("PATTERN"));
428
429        let result = convert_regex_string("pattern", |s: String| s).unwrap();
430        assert_eq!(result.as_str(), "pattern");
431
432        let result = convert_regex_string("/patt\\[ern", |s: String| s).unwrap();
433        assert_eq!(result.as_str(), "/patt\\[ern");
434
435        let result = convert_regex_string("/؋/", |s: String| s).unwrap();
436        assert_eq!(result.as_str(), "؋");
437
438        convert_regex_string("/\\؋/i", |s: String| s).unwrap_err();
439        let result = convert_regex_string("/؋/i", |s: String| s).unwrap();
440        assert_eq!(result.as_str(), "؋");
441        assert!(result.is_match("؋"));
442    }
443
444    #[test]
445    fn test_to_escaped_string() {
446        let s = Str::from("Hello, world!");
447        assert_eq!(s.to_escaped_string(), "\"Hello, world!\"");
448
449        let s = Str::from("Hello, \nworld!");
450        assert_eq!(s.to_escaped_string(), "\"Hello, \\nworld!\"");
451
452        let s = Str::from("Hello, \rworld!");
453        assert_eq!(s.to_escaped_string(), "\"Hello, \\rworld!\"");
454
455        let s = Str::from("Hello, \tworld!");
456        assert_eq!(s.to_escaped_string(), "\"Hello, \\tworld!\"");
457
458        let s = Str::from("Hello, \"world!\"");
459        assert_eq!(s.to_escaped_string(), "\"Hello, \\\"world!\\\"\"");
460    }
461
462    #[test]
463    fn test_matching() {
464        let result = Str::matching_op(
465            &Str::from("Hello, world!"),
466            &Str::from("[a-z]").into(),
467            MatchingOperation::Contains,
468        )
469        .unwrap();
470        assert_eq!(result, Bool::from(true).into());
471
472        let result = Str::matching_op(
473            &Str::from("Hello, world!"),
474            &Str::from("world").into(),
475            MatchingOperation::StartsWith,
476        )
477        .unwrap();
478        assert_eq!(result, Bool::from(false).into());
479
480        let result = Str::matching_op(
481            &Str::from("Hello, world!"),
482            &Str::from("world!").into(),
483            MatchingOperation::EndsWith,
484        )
485        .unwrap();
486        assert_eq!(result, Bool::from(true).into());
487
488        let result = Str::matching_op(
489            &Str::from("Hello, world!"),
490            &Str::from("Hello, w..ld!").into(),
491            MatchingOperation::Matches,
492        )
493        .unwrap();
494        assert_eq!(result, Bool::from(true).into());
495
496        let result = Str::matching_op(
497            &Str::from("Hello, world!"),
498            &Str::from("/h.*/i").into(),
499            MatchingOperation::Matches,
500        )
501        .unwrap();
502        assert_eq!(result, Bool::from(true).into());
503
504        // test the m s U u x flags
505        let result = Str::matching_op(
506            &Str::from("Hello\n, world!"),
507            &Str::from("/h.*/isUuxm").into(),
508            MatchingOperation::Matches,
509        )
510        .unwrap();
511        assert_eq!(result, Bool::from(true).into());
512
513        let result = Str::matching_op(
514            &Str::from("Hello, world!"),
515            &Str::from("/H.*/").into(),
516            MatchingOperation::Matches,
517        )
518        .unwrap();
519        assert_eq!(result, Bool::from(true).into());
520
521        let result = Str::matching_op(
522            &Str::from("Hello, world!"),
523            &Str::from("/h.*").into(),
524            MatchingOperation::Matches,
525        )
526        .unwrap();
527        assert_eq!(result, Bool::from(false).into());
528
529        Str::matching_op(
530            &Str::from("Hello, world!"),
531            &Str::from("/h.*/y").into(),
532            MatchingOperation::Matches,
533        )
534        .unwrap_err();
535    }
536
537    #[test]
538    fn test_indexing() {
539        let value_range = Array::from(vec![0, 1, 2]);
540        let value_range = Value::from(value_range);
541        let value_range = Str::index_value_to_range(&value_range).unwrap();
542        let value_range = value_range.start()..=value_range.end();
543        assert_eq!(value_range.start(), &&Value::i64(0));
544        assert_eq!(value_range.end(), &&Value::i64(2));
545        let s = Str::from("012");
546        assert_eq!(s.substr(value_range).unwrap(), "012");
547
548        // test mut_substr
549        let mut s = Str::from("012");
550        assert_eq!(s.mut_substr(&0.into()..=&1.into()).unwrap(), "01");
551
552        let s = Str::from("012");
553        assert_eq!(s.substr(&(-2).into()..=&(-1).into()).unwrap(), "12");
554
555        // normal string
556        let s = Str::from("Hello, world!");
557        assert_eq!(s.substr(&0.into()..=&1.into()).unwrap(), "He");
558
559        // Bad and scary unicode string, with multibyte chars at the start
560        let s = Str::from("👋🌎");
561        assert_eq!(s.substr(&0.into()..=&0.into()).unwrap(), "👋");
562
563        let mut s = Str::from("S👋🌎");
564        s.set_substr(&1.into()..=&1.into(), "B".into()).unwrap();
565        assert_eq!(s, "SB🌎".into());
566
567        let mut s = Str::from("S👋🌎");
568        s.set_substr(&0.into()..=&1.into(), "B".into()).unwrap();
569        assert_eq!(s, "B🌎".into());
570
571        let mut s = Str::from("S👋🌎");
572        s.set_substr(&0.into()..=&0.into(), "B".into()).unwrap();
573        assert_eq!(s, "B👋🌎".into());
574
575        let mut s = Str::from("S👋🌎");
576        s.set_substr(&2.into()..=&2.into(), "B".into()).unwrap();
577        assert_eq!(s, "S👋B".into());
578
579        let s = Str::from("S👋🌎");
580        let s = s.get_indices(&Value::from(vec![0, 2])).unwrap();
581        assert_eq!(s, "S🌎".into());
582
583        let s = Str::from("S👋🌎");
584        let s = s.get_indices(&Value::from(Vec::<Value>::new())).unwrap();
585        assert_eq!(s, "".into());
586
587        let s = Str::from("S👋🌎");
588        let s = s.get_indices(&Value::from(0..=1)).unwrap();
589        assert_eq!(s, "S👋".into());
590    }
591
592    #[test]
593    fn test_arithmetic() {
594        let result = Str::arithmetic_op(
595            Str::from("Hello, "),
596            Str::from("world!"),
597            ArithmeticOperation::Add,
598        )
599        .unwrap();
600        assert_eq!(result, Str::from("Hello, world!"));
601
602        let result = Str::arithmetic_op(
603            Str::from("Hello, world!"),
604            Str::from("d!"),
605            ArithmeticOperation::Subtract,
606        )
607        .unwrap();
608        assert_eq!(result, Str::from("Hello, worl"));
609
610        let result = Str::arithmetic_neg(Str::from("Hello, world!")).unwrap();
611        assert_eq!(result, Str::from("!dlrow ,olleH"));
612
613        // now with emojis
614        let result = Str::arithmetic_neg(Str::from("👋🌎")).unwrap();
615        assert_eq!(result, Str::from("🌎👋"));
616
617        Str::arithmetic_op(
618            Str::from("👋🌎"),
619            Str::from("🌎"),
620            ArithmeticOperation::Divide,
621        )
622        .unwrap_err();
623    }
624
625    #[test]
626    fn test_boolean_logic() {
627        let result = Str::boolean_op(
628            Str::from("Hello, "),
629            Str::from("world!"),
630            BooleanOperation::And,
631        )
632        .unwrap();
633        assert_eq!(result, Bool::from(true).into());
634
635        let result = Str::boolean_op(
636            Str::from("Hello, "),
637            Str::from("world!"),
638            BooleanOperation::Or,
639        )
640        .unwrap();
641        assert_eq!(result, Bool::from(true).into());
642
643        let result = Str::boolean_op(
644            Str::from("Hello, "),
645            Str::from("world!"),
646            BooleanOperation::LT,
647        )
648        .unwrap();
649        assert_eq!(result, Bool::from(true).into());
650
651        let result = Str::boolean_op(
652            Str::from("Hello, "),
653            Str::from("world!"),
654            BooleanOperation::GT,
655        )
656        .unwrap();
657        assert_eq!(result, Bool::from(false).into());
658
659        let result = Str::boolean_op(
660            Str::from("Hello, "),
661            Str::from("world!"),
662            BooleanOperation::LTE,
663        )
664        .unwrap();
665        assert_eq!(result, Bool::from(true).into());
666
667        let result = Str::boolean_op(
668            Str::from("Hello, "),
669            Str::from("world!"),
670            BooleanOperation::GTE,
671        )
672        .unwrap();
673        assert_eq!(result, Bool::from(false).into());
674
675        let result = Str::boolean_op(
676            Str::from("Hello, "),
677            Str::from("world!"),
678            BooleanOperation::EQ,
679        )
680        .unwrap();
681        assert_eq!(result, Bool::from(false).into());
682
683        let result = Str::boolean_op(
684            Str::from("Hello, "),
685            Str::from("world!"),
686            BooleanOperation::NEQ,
687        )
688        .unwrap();
689        assert_eq!(result, Bool::from(true).into());
690
691        let result = Str::boolean_not(Str::from("Hello, world!")).unwrap();
692        assert_eq!(result, Bool::from(false).into());
693    }
694}