aws_smt_strings/
smt_strings.rs

1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//!
5//! Basic string representation and operations for the SMT-LIB theory of strings
6//!
7//! In SMT-LIB, a string is a sequence of integers in the range 0x00000 .. 0x2FFFF (inclusive).
8//! This is enough to encode Unicode characters but can also include non Unicode characters.
9//!
10//! This module provide functions for constructing SMT string constants. It also implements
11//! all operations on strings defined by the SMT-LIB standard.
12//!
13//! We limit the length of string constants to [i32::MAX] and the code will panic if this bound is exceeded.
14//!
15
16#![allow(clippy::many_single_char_names)]
17#![allow(clippy::manual_range_contains)]
18#![allow(clippy::uninlined_format_args)]
19
20use crate::matcher::{naive_search, SearchResult};
21use std::slice::Iter;
22use std::{char, cmp, fmt};
23
24/// Maximal length of a string
25pub const MAX_LENGTH: i32 = i32::MAX;
26
27/// Maximal character index
28pub const MAX_CHAR: u32 = 0x2FFFF;
29
30/// Replacement for an integer not in the range [0...0x2FFFF]
31pub const REPLACEMENT_CHAR: u32 = 0xFFFD;
32
33///
34/// Internal representation of an SMT string
35/// - s = a vector of SMT character. Each character is a 32bit unsigned integer
36/// - maximal length = 2^31-1
37///
38#[derive(Debug, PartialEq, Eq, Clone, Hash)]
39pub struct SmtString {
40    s: Vec<u32>,
41}
42
43/// The empty string
44pub const EMPTY: SmtString = SmtString { s: vec![] };
45
46impl SmtString {
47    // internal construct
48    fn make(a: Vec<u32>) -> SmtString {
49        let n = a.len();
50        if n > MAX_LENGTH as usize {
51            panic!(
52                "Cannot construct a string of length {}: max length is {}",
53                n, MAX_LENGTH,
54            );
55        }
56        SmtString { s: a }
57    }
58
59    // construct from a slice
60    fn make_from_slice(a: &[u32]) -> SmtString {
61        Self::make(a.to_vec())
62    }
63
64    /// Check whether this is a good string
65    pub fn is_good(&self) -> bool {
66        let n = self.s.len();
67        n < MAX_LENGTH as usize && good_string(&self.s)
68    }
69
70    /// Check whether all SMT chars are valid unicode
71    pub fn is_unicode(&self) -> bool {
72        all_unicode(&self.s)
73    }
74
75    /// Convert to a regular string
76    /// - any char that's not a valid unicode code point is replaced by U+FFFD
77    pub fn to_unicode_string(&self) -> String {
78        map_to_unicode(&self.s)
79    }
80
81    /// Get the length
82    pub fn len(&self) -> usize {
83        self.s.len()
84    }
85
86    /// Check whether the string is empty
87    pub fn is_empty(&self) -> bool {
88        self.s.is_empty()
89    }
90
91    /// Get character at index i.
92    /// panic if i is out of range
93    pub fn char(&self, i: usize) -> u32 {
94        self.s[i]
95    }
96
97    /// Iterator
98    pub fn iter(&self) -> Iter<'_, u32> {
99        self.s.iter()
100    }
101}
102
103impl AsRef<[u32]> for SmtString {
104    fn as_ref(&self) -> &[u32] {
105        self.s.as_ref()
106    }
107}
108
109///
110/// Construct an SmtString from a UTF8 string x
111///
112/// # Example
113/// ```
114/// use aws_smt_strings::smt_strings::*;
115///
116/// assert_eq!(SmtString::from(""), EMPTY);
117/// assert_eq!(SmtString::from("\u{0331}"), SmtString::from(0x331));
118/// ```
119impl From<&str> for SmtString {
120    fn from(x: &str) -> Self {
121        SmtString::make(x.chars().map(|c| c as u32).collect())
122    }
123}
124
125impl From<String> for SmtString {
126    fn from(x: String) -> Self {
127        SmtString::from(x.as_str())
128    }
129}
130
131///
132/// Construct an SmtString from a slice or array of integers.
133///
134/// Any element of a not in the range [0, 0x2ffff] is replaced by 0xfffd.
135///
136/// # Example
137/// ```
138/// use aws_smt_strings::smt_strings::*;
139///
140/// assert_eq!(SmtString::from(&[97, 98, 99]), SmtString::from("abc"));
141/// ```
142impl From<&[u32]> for SmtString {
143    fn from(a: &[u32]) -> Self {
144        SmtString::make(
145            a.iter()
146                .map(|&x| if x <= MAX_CHAR { x } else { REPLACEMENT_CHAR })
147                .collect(),
148        )
149    }
150}
151
152impl<const N: usize> From<&[u32; N]> for SmtString {
153    fn from(a: &[u32; N]) -> Self {
154        a[..].into()
155    }
156}
157
158impl From<Vec<u32>> for SmtString {
159    fn from(a: Vec<u32>) -> Self {
160        if a.iter().all(|&x| x <= MAX_CHAR) {
161            SmtString::make(a)
162        } else {
163            a[..].into()
164        }
165    }
166}
167
168///
169/// Construct a single-character string from integer x.
170///
171/// Convert x to 0xfffd if it's not a valid character (i.e., if x is not in the range [0, 0x2ffff])
172///
173impl From<u32> for SmtString {
174    fn from(x: u32) -> Self {
175        let x = if x <= MAX_CHAR { x } else { REPLACEMENT_CHAR };
176        SmtString::make(vec![x])
177    }
178}
179
180///
181/// Construct a single-character string from character x.
182///
183impl From<char> for SmtString {
184    fn from(x: char) -> SmtString {
185        SmtString::make(vec![x as u32])
186    }
187}
188
189// State machine for parsing SMT-LIB literals
190#[derive(Debug, PartialEq, Eq)]
191enum State {
192    Init,
193    AfterSlash,
194    AfterSlashU,
195    AfterSlashUHex,
196    AfterSlashUBrace,
197}
198
199#[derive(Debug, PartialEq, Eq)]
200struct ParsingAutomaton {
201    state: State,
202    // SMT string under construction
203    string_so_far: Vec<u32>,
204    // pending + pending_idx form an escape prefix
205    // (e.g., something like "\u3C") is stored in pending[0 .. 3] and pending_idx = 4
206    pending: [u32; 9],
207    pending_idx: usize,
208    // escape code = value of the escape prefix
209    escape_code: u32,
210}
211
212fn new_automaton() -> ParsingAutomaton {
213    ParsingAutomaton {
214        state: State::Init,
215        string_so_far: Vec::new(),
216        pending: [0; 9],
217        pending_idx: 0,
218        escape_code: 0,
219    }
220}
221
222impl ParsingAutomaton {
223    // add char x to the string so far
224    fn push(&mut self, x: char) {
225        self.string_so_far.push(x as u32);
226    }
227
228    // add char x to the pending array
229    fn pending(&mut self, x: char) {
230        let i = self.pending_idx;
231        assert!(i < 9);
232        self.pending[i] = x as u32;
233        self.pending_idx += 1;
234    }
235
236    // consume character x in the Init state
237    fn consume(&mut self, x: char) {
238        if x == '\\' {
239            self.pending(x);
240            self.state = State::AfterSlash;
241        } else {
242            self.push(x);
243        }
244    }
245
246    // add all pending characters to the string so far
247    // then reset state to INIT
248    fn flush_pending(&mut self) {
249        let pending = &self.pending[0..self.pending_idx];
250        self.string_so_far.extend_from_slice(pending);
251        self.pending_idx = 0;
252        self.escape_code = 0;
253        self.state = State::Init;
254    }
255
256    // close the escape sequence
257    fn close_escape_seq(&mut self) {
258        self.string_so_far.push(self.escape_code);
259        self.pending_idx = 0;
260        self.escape_code = 0;
261        self.state = State::Init;
262    }
263
264    // add hexadecimal character x to the pending array
265    // and update the escape_code
266    fn add_hex(&mut self, x: char) {
267        let hex = x.to_digit(16).unwrap();
268        self.escape_code = self.escape_code << 4 | hex;
269        self.pending(x);
270    }
271
272    // accept a new character x
273    fn accept(&mut self, x: char) {
274        match self.state {
275            State::Init => {
276                self.consume(x);
277            }
278            State::AfterSlash => {
279                if x == 'u' {
280                    self.pending(x);
281                    self.state = State::AfterSlashU;
282                } else {
283                    self.flush_pending();
284                    self.consume(x);
285                }
286            }
287            State::AfterSlashU => {
288                if x == '{' {
289                    self.pending(x);
290                    self.state = State::AfterSlashUBrace;
291                } else if x.is_ascii_hexdigit() {
292                    self.add_hex(x);
293                    self.state = State::AfterSlashUHex;
294                } else {
295                    self.flush_pending();
296                    self.consume(x);
297                }
298            }
299            State::AfterSlashUBrace => {
300                // '}' terminates the escape sequence if we have at least one hex digit
301                // (i.e.. pending_idx > 3) and the escape_code is no more than 0x2FFFF
302                if x == '}' && self.pending_idx > 3 && self.escape_code <= MAX_CHAR {
303                    self.close_escape_seq();
304                } else if x.is_ascii_hexdigit() && self.pending_idx < 8 {
305                    // can't have more than 5 hex digit after '\u{'
306                    self.add_hex(x);
307                } else {
308                    self.flush_pending();
309                    self.consume(x);
310                }
311            }
312            State::AfterSlashUHex => {
313                // end the escape sequence exactly four hex digits after '\u'
314                if x.is_ascii_hexdigit() {
315                    self.add_hex(x);
316                    if self.pending_idx == 6 {
317                        self.close_escape_seq();
318                    }
319                } else {
320                    self.flush_pending();
321                    self.consume(x);
322                }
323            }
324        }
325    }
326}
327
328///
329/// Parse a string literal in the SMT-LIB syntax and
330/// convert it to an SmtString.
331///
332/// # Example
333/// ```
334/// use aws_smt_strings::smt_strings::*;
335///
336/// assert_eq!(parse_smt_literal("abc"), SmtString::from(&[97, 98, 99]));
337/// assert_eq!(parse_smt_literal(r"a\u0000C"), SmtString::from(&[97, 0, 67]));
338///
339/// // bad escape sequences are copied verbatim
340/// assert_eq!(parse_smt_literal(r"\u2CA"), SmtString::from(&[92, 117, 50, 67, 65]));
341/// ```
342///
343pub fn parse_smt_literal(a: &str) -> SmtString {
344    let mut parser = new_automaton();
345    for x in a.chars() {
346        parser.accept(x);
347    }
348    parser.flush_pending();
349    SmtString::make(parser.string_so_far)
350}
351
352///
353/// Check whether an integer is a valid SMT character
354///
355/// This checks whether integer `x` is less than or equal to [MAX_CHAR].
356///
357pub fn good_char(x: u32) -> bool {
358    x <= MAX_CHAR
359}
360
361///
362/// Check whether all elements in a slice are valid SMT characters
363///
364/// Return true if all integers in `a` are less than or equal to [MAX_CHAR].
365///
366pub fn good_string(a: &[u32]) -> bool {
367    a.iter().all(|&x| x <= MAX_CHAR)
368}
369
370///
371/// Convert an SMT character to a string literal
372///
373pub fn smt_char_as_string(x: u32) -> String {
374    if x == '"' as u32 {
375        "\"\"".to_string()
376    } else if x >= 32 && x < 127 {
377        char::from_u32(x).unwrap().to_string()
378    } else if x < 32 || x == 127 {
379        format!("\\u{{{:02x}}}", x)
380    } else if x < 0x10000 {
381        format!("\\u{:04x}", x)
382    } else {
383        format!("\\u{{{:x}}}", x)
384    }
385}
386
387// Convert to an ASCII string in the SMT syntax
388// - use escape sequences for non-printable ASCII characters and non-ASCII characters
389// - convert "  to ""
390impl fmt::Display for SmtString {
391    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
392        write!(f, "\"")?;
393        for &x in self.s.iter() {
394            if x == '"' as u32 {
395                write!(f, "\"\"")?;
396            } else if x >= 32 && x < 127 {
397                write!(f, "{}", char::from_u32(x).unwrap())?;
398            } else if x < 32 || x == 127 {
399                write!(f, "\\u{{{:02x}}}", x)?;
400            } else if x < 0x10000 {
401                write!(f, "\\u{:04x}", x)?;
402            } else {
403                write!(f, "\\u{{{:x}}}", x)?;
404            }
405        }
406        write!(f, "\"")
407    }
408}
409
410///
411/// Convert integer x (interpreted as a Unicode codepoint) to a string in the SMT syntax:
412///
413/// 1) printable ASCII characters (other than double quote) are unchanged
414/// 2) a double quote is converted to two double quotes
415/// 3) non-printable ASCII characters are converted to "\u{xx}" (two hexadecimal digits)
416/// 4) other characters are printed as "\uxxxx" or "\u{xxxxx}"
417/// 5) if x is outside the valid SMT range (i.e., x > 0x2FFFF) then it's
418///    converted to the non-SMT compliant string \u{xxxxxx} with as many hexadecimal
419///    digits as required.
420///
421pub fn char_to_smt(x: u32) -> String {
422    if x == '"' as u32 {
423        "\"\"".to_string()
424    } else if x >= 32 && x < 127 {
425        char::from_u32(x).unwrap().to_string()
426    } else if x < 32 || x == 127 {
427        format!("\\u{{{:02x}}}", x)
428    } else if x < 0x10000 {
429        format!("\\u{:04x}", x)
430    } else {
431        format!("\\u{{{:x}}}", x)
432    }
433}
434
435#[allow(dead_code)]
436fn hex(i: u32) -> char {
437    char::from_digit(i & 0xF, 16).unwrap()
438}
439
440#[allow(dead_code)]
441fn append_smt_char(mut s: String, x: u32) -> String {
442    if x == '"' as u32 {
443        s.push_str("\"\"")
444    } else if x >= 32 && x < 127 {
445        s.push(char::from_u32(x).unwrap())
446    } else if x < 32 {
447        s.push_str("\\u{");
448        s.push(hex(x >> 4));
449        s.push(hex(x));
450        s.push('}');
451    } else if x < 0x10000 {
452        s.push_str("\\u");
453        s.push(hex(x >> 12));
454        s.push(hex(x >> 8));
455        s.push(hex(x >> 4));
456        s.push(hex(x));
457    } else {
458        s.push_str("\\u{");
459        s.push(hex(x >> 16));
460        s.push(hex(x >> 12));
461        s.push(hex(x >> 8));
462        s.push(hex(x >> 4));
463        s.push(hex(x));
464        s.push('}');
465    };
466    s
467}
468
469// INTERNAL OPERATIONS ON VECTORS/SLICES
470
471// Check whether all elements of v are valid code points
472fn all_unicode(v: &[u32]) -> bool {
473    v.iter().all(|&x| char::from_u32(x).is_some())
474}
475
476// Map to a regular string. If an element is not valid Unicode, it's replaced by U+FFFD
477fn map_to_unicode(v: &[u32]) -> String {
478    v.iter()
479        .map(|&x| char::from_u32(x).unwrap_or(char::REPLACEMENT_CHARACTER))
480        .collect()
481}
482
483// Check whether x is a decimal digit
484fn char_is_digit(x: u32) -> bool {
485    x >= '0' as u32 && x <= '9' as u32
486}
487
488// Check whether v < w in the lexicographic ordering
489fn vector_lt(v: &[u32], w: &[u32]) -> bool {
490    let max = cmp::min(v.len(), w.len());
491    let mut i = 0;
492    while i < max && v[i] == w[i] {
493        i += 1;
494    }
495    if i == max {
496        v.len() < w.len()
497    } else {
498        v[i] < w[i]
499    }
500}
501
502// Check whether v <= w in the lexicographic ordering
503fn vector_le(v: &[u32], w: &[u32]) -> bool {
504    let max = cmp::min(v.len(), w.len());
505    let mut i = 0;
506    while i < max && v[i] == w[i] {
507        i += 1;
508    }
509    if i == max {
510        v.len() <= w.len()
511    } else {
512        v[i] < w[i]
513    }
514}
515
516// Check whether v is a prefix of w
517fn vector_prefix(v: &[u32], w: &[u32]) -> bool {
518    let n = v.len();
519    if n <= w.len() {
520        let mut i = 0;
521        while i < n && v[i] == w[i] {
522            i += 1;
523        }
524        i == n
525    } else {
526        false // v is longer than w
527    }
528}
529
530// Check whether v is a suffix of w
531fn vector_suffix(v: &[u32], w: &[u32]) -> bool {
532    let n = v.len();
533    let m = w.len();
534    if n <= m {
535        let k = m - n;
536        let mut i = 0;
537        while i < n && v[i] == w[i + k] {
538            i += 1;
539        }
540        i == n
541    } else {
542        false
543    }
544}
545
546// Concatenate v and w
547fn vector_concat(v: &[u32], w: &[u32]) -> Vec<u32> {
548    let mut x = Vec::new();
549    x.extend_from_slice(v);
550    x.extend_from_slice(w);
551    x
552}
553
554// find the first occurrence of v in w, starting at index i
555fn find_sub_vector(v: &[u32], w: &[u32], i: usize) -> SearchResult {
556    naive_search(v, w, i)
557}
558
559// SMT-LIKE FUNCTIONS ON STRINGS
560
561///
562/// Concatenate two strings
563///
564/// # Panics
565///
566/// If the resulting strings would have length larger than [MAX_LENGTH]
567///
568/// # Example
569/// ```
570/// use aws_smt_strings::smt_strings::*;
571///
572/// let s1 = SmtString::from("aa");
573/// let s2 = SmtString::from("bb");
574/// assert_eq!(str_concat(&s1,&s2), SmtString::from("aabb"));
575/// ```
576///
577pub fn str_concat(s1: &SmtString, s2: &SmtString) -> SmtString {
578    SmtString::make(vector_concat(&s1.s, &s2.s))
579}
580
581///
582/// String length
583///
584/// # Example
585/// ```
586/// use aws_smt_strings::smt_strings::*;
587///
588/// assert_eq!(str_len(&EMPTY), 0);
589/// assert_eq!(str_len(&SmtString::from("abc")), 3);
590/// ```
591pub fn str_len(s: &SmtString) -> i32 {
592    s.len() as i32
593}
594
595///
596/// Character at index i in s, converted to a single-character string
597///
598/// - Characters are indexed from 0 to s.len - 1.
599/// - Return the empty string if i is not in this range.
600///
601/// # Example
602/// ```
603/// use aws_smt_strings::smt_strings::*;
604///
605/// let s = SmtString::from("abcd");
606///
607/// assert_eq!(str_at(&s, 2), SmtString::from('c'));
608/// assert_eq!(str_at(&s, 4), EMPTY);
609/// ```
610pub fn str_at(s: &SmtString, i: i32) -> SmtString {
611    if i < 0 || i >= s.len() as i32 {
612        EMPTY
613    } else {
614        SmtString::from(s.s[i as usize])
615    }
616}
617
618///
619/// Substring of s that starts at index i and has length at most n
620///
621/// - Return the empty string if n < 0 or i is not in [0 .. s.len-1]
622/// - If s has length at least i+n, return the substring s[i...i+n-1]
623/// - If s has length less than i+n, return the substring s[i .. s.len-1]
624///
625/// # Example
626/// ```
627/// use aws_smt_strings::smt_strings::*;
628///
629/// let s = SmtString::from("abcdef");
630///
631/// assert_eq!(str_substr(&s, 2, 3), SmtString::from("cde"));
632/// ```
633pub fn str_substr(s: &SmtString, i: i32, n: i32) -> SmtString {
634    if i < 0 || i >= s.len() as i32 || n <= 0 {
635        EMPTY
636    } else {
637        let i = i as usize;
638        let n = n as usize;
639        let j = cmp::min(i + n, s.s.len());
640        SmtString::make_from_slice(s.s.get(i..j).unwrap())
641    }
642}
643
644///
645/// Strict lexicographic comparison.
646///
647/// - Return true if s1 < s2 in the lexicographic ordering
648///
649/// # Examples
650/// ```
651/// use aws_smt_strings::smt_strings::*;
652///
653/// let s1 = SmtString::from("abcdef");
654/// let s2 = SmtString::from("abcd");
655/// let s3 = SmtString::from("bbb");
656///
657/// assert!(str_lt(&s2, &s1));
658/// assert!(str_lt(&s1, &s3));
659/// assert!(str_lt(&EMPTY, &s3));
660///
661/// assert!(! str_lt(&s1, &s2));
662/// assert!(! str_lt(&s2, &s2));
663/// assert!(! str_lt(&s2, &EMPTY));
664/// ```
665pub fn str_lt(s1: &SmtString, s2: &SmtString) -> bool {
666    vector_lt(&s1.s, &s2.s)
667}
668
669///
670/// Lexicographic comparison.
671///
672/// - Return true if s1 <= s2 in the lexicographic ordering
673///
674/// # Examples
675/// ```
676/// use aws_smt_strings::smt_strings::*;
677///
678/// let s1 = SmtString::from("abcdef");
679/// let s2 = SmtString::from("abcd");
680/// let s3 = SmtString::from("bbb");
681///
682/// assert!(str_le(&s2, &s1));
683/// assert!(str_le(&s1, &s3));
684/// assert!(str_le(&s2, &s2));
685/// assert!(str_le(&EMPTY, &s3));
686///
687/// assert!(! str_le(&s1, &s2));
688/// assert!(! str_le(&s2, &EMPTY));
689/// ```
690pub fn str_le(s1: &SmtString, s2: &SmtString) -> bool {
691    vector_le(&s1.s, &s2.s)
692}
693
694///
695/// Check whether s1 is a prefix of s2
696///
697/// # Examples
698/// ```
699/// use aws_smt_strings::smt_strings::*;
700///
701/// let s1 = SmtString::from("abcdef");
702/// let s2 = SmtString::from("abcd");
703/// let s3 = SmtString::from("bbb");
704///
705/// assert!(str_prefixof(&s2, &s1));
706/// assert!(str_prefixof(&s1, &s1));
707/// assert!(str_prefixof(&EMPTY, &s3));
708///
709/// assert!(! str_prefixof(&s1, &s2));
710/// ```
711pub fn str_prefixof(s1: &SmtString, s2: &SmtString) -> bool {
712    vector_prefix(&s1.s, &s2.s)
713}
714
715///
716/// Check whether s1 is a suffix of s2
717///
718/// # Examples
719/// ```
720/// use aws_smt_strings::smt_strings::*;
721///
722/// let s1 = SmtString::from("abcdef");
723/// let s2 = SmtString::from("def");
724/// let s3 = SmtString::from("bbb");
725///
726/// assert!(str_suffixof(&s2, &s1));
727/// assert!(str_suffixof(&s1, &s1));
728/// assert!(str_suffixof(&EMPTY, &s3));
729///
730/// assert!(! str_suffixof(&s3, &s1));
731/// ```
732pub fn str_suffixof(s1: &SmtString, s2: &SmtString) -> bool {
733    vector_suffix(&s1.s, &s2.s)
734}
735
736///
737/// Check whether s2 is a substring of s1
738///
739/// # Examples
740/// ```
741/// use aws_smt_strings::smt_strings::*;
742///
743/// let s1 = SmtString::from("abcdef");
744/// let s2 = SmtString::from("cde");
745/// let s3 = SmtString::from("cdd");
746///
747/// assert!(str_contains(&s1, &s2));;
748/// assert!(str_contains(&s3, &EMPTY));
749///
750/// assert!(! str_contains(&s2, &s1));
751/// assert!(! str_contains(&s1, &s3));
752/// ```
753pub fn str_contains(s1: &SmtString, s2: &SmtString) -> bool {
754    match find_sub_vector(&s2.s, &s1.s, 0) {
755        SearchResult::NotFound => false,
756        SearchResult::Found(..) => true,
757    }
758}
759
760///
761/// Index of the first occurrence of s2 in s1, starting from index i
762///
763/// - If 0 <= i < s1.len and s2 occurs in s1[i ..] then return the index j >= i
764///   of the first occurrence of s2 in s1[i ..].
765/// - Return -1 if i < 0 or i >= s1.len or s2 does not occur in s1[i ..]
766///
767/// # Examples
768/// ```
769/// use aws_smt_strings::smt_strings::*;
770///
771/// let s1 = SmtString::from("abcdef");
772/// let s2 = SmtString::from("cde");
773/// let s3 = SmtString::from("cdd");
774///
775/// assert_eq!(str_indexof(&s1, &s2, 0), 2);
776/// assert_eq!(str_indexof(&s1, &s2, 2), 2);
777/// assert_eq!(str_indexof(&s1, &s2, 3), -1);
778/// assert_eq!(str_indexof(&s1, &s3, 0), -1);
779/// ```
780///
781pub fn str_indexof(s1: &SmtString, s2: &SmtString, i: i32) -> i32 {
782    if i < 0 || i >= s1.len() as i32 {
783        -1
784    } else {
785        match find_sub_vector(&s2.s, &s1.s, i as usize) {
786            SearchResult::NotFound => -1,
787            SearchResult::Found(k, _) => k as i32,
788        }
789    }
790}
791
792///
793/// Replace the first occurrence of p in s with r.
794///
795/// - Return s unchanged if p does not occur in s
796///
797/// # Panics
798///
799/// If the resulting string would have length larger than [MAX_LENGTH]
800///
801/// # Examples
802/// ```
803/// use aws_smt_strings::smt_strings::*;
804///
805/// let s1 = SmtString::from("abcdef");
806/// let s2 = SmtString::from("cde");
807/// let s3 = SmtString::from("Z");
808///
809/// assert_eq!(str_replace(&s1, &s2, &s3), SmtString::from("abZf"));
810/// assert_eq!(str_replace(&s1, &EMPTY, &s3), SmtString::from("Zabcdef"));
811/// ```
812pub fn str_replace(s: &SmtString, p: &SmtString, r: &SmtString) -> SmtString {
813    let s = &s.s;
814    let p = &p.s;
815    let r = &r.s;
816
817    match find_sub_vector(p, s, 0) {
818        SearchResult::NotFound => SmtString::make_from_slice(s),
819        SearchResult::Found(i, j) => {
820            let mut x = Vec::new();
821            x.extend_from_slice(&s[..i]);
822            x.extend_from_slice(r);
823            x.extend_from_slice(&s[j..]);
824            SmtString::make(x)
825        }
826    }
827}
828
829///
830/// Replace all occurrences of p in s with r.
831///
832/// - return s unchanged if p is the empty string or if p doesn't occur in s
833///
834/// # Panics
835///
836/// If the resulting string would have length larger than [MAX_LENGTH]
837///
838/// # Examples
839/// ```
840/// use aws_smt_strings::smt_strings::*;
841///
842/// let s1 = SmtString::from("abcdcdef");
843/// let s2 = SmtString::from("cd");
844/// let s3 = SmtString::from("Z");
845///
846/// assert_eq!(str_replace_all(&s1, &s2, &s3), SmtString::from("abZZef"));
847/// assert_eq!(str_replace_all(&s1, &EMPTY, &s2), s1);
848/// ```
849pub fn str_replace_all(s: &SmtString, p: &SmtString, r: &SmtString) -> SmtString {
850    if p.is_empty() {
851        SmtString::make_from_slice(&s.s)
852    } else {
853        let s = &s.s;
854        let p = &p.s;
855        let r = &r.s;
856        let mut x = Vec::new();
857        let mut i = 0;
858        while let SearchResult::Found(j, k) = find_sub_vector(p, s, i) {
859            // s[j .. k] == p
860            x.extend_from_slice(&s[i..j]);
861            x.extend_from_slice(r);
862            i = k;
863        }
864        x.extend_from_slice(&s[i..]);
865        SmtString::make(x)
866    }
867}
868
869///
870/// Check whether s contains a single digit between '0' and '9'
871///
872/// # Examples
873/// ```
874/// use aws_smt_strings::smt_strings::*;
875///
876/// assert!(str_is_digit(&SmtString::from("0")));
877/// assert!(! str_is_digit(&SmtString::from("A")));
878/// ```
879pub fn str_is_digit(s: &SmtString) -> bool {
880    s.len() == 1 && char_is_digit(s.s[0])
881}
882
883///
884/// Code for s if s is a single-character string, -1 otherwise
885///
886/// # Examples
887/// ```
888/// use aws_smt_strings::smt_strings::*;
889///
890/// assert_eq!(str_to_code(&EMPTY), -1);
891/// assert_eq!(str_to_code(&SmtString::from(1202)), 1202);
892/// assert_eq!(str_to_code(&SmtString::from("abc")), -1);
893/// ```
894pub fn str_to_code(s: &SmtString) -> i32 {
895    if s.len() == 1 {
896        s.s[0] as i32
897    } else {
898        -1
899    }
900}
901
902///
903/// Single-character string with character code x
904///
905/// - Return a single-character string if 0 <= x <= 0x2ffff
906/// - Return the empty string otherwise
907///
908/// # Examples
909/// ```
910/// use aws_smt_strings::smt_strings::*;
911///
912/// assert_eq!(str_from_code(-19), EMPTY);
913/// assert_eq!(str_from_code(1202), SmtString::from(1202));
914/// assert_eq!(str_from_code(0x30000), EMPTY);
915/// ```
916pub fn str_from_code(x: i32) -> SmtString {
917    if 0 <= x && x <= MAX_CHAR as i32 {
918        SmtString::from(x as u32)
919    } else {
920        EMPTY
921    }
922}
923
924///
925/// Convert s to an integer
926///
927/// - Return a non-negative integer if s consists entirely of decimal digits.
928/// - Return -1 otherwise
929///
930/// # Panics
931///
932/// If the result is too large and does not fit in `i32`.
933///
934/// # Examples
935/// ```
936/// use aws_smt_strings::smt_strings::*;
937///
938/// assert_eq!(str_to_int(&SmtString::from("00982")), 982);
939/// assert_eq!(str_to_int(&EMPTY), -1);
940/// assert_eq!(str_to_int(&SmtString::from("101aaabb")), -1);
941/// ```
942pub fn str_to_int(s: &SmtString) -> i32 {
943    if s.is_empty() {
944        return -1;
945    }
946
947    let mut x: i32 = 0;
948    for &d in &s.s {
949        if char_is_digit(d) {
950            let y = 10 * x + (d as i32 - '0' as i32);
951            if y < x {
952                panic!("Arithmetic overflow in str_to_int");
953            }
954            x = y;
955        } else {
956            return -1;
957        }
958    }
959    x
960}
961
962///
963/// Convert x to a string
964///
965/// - Return a sequence of digits if x >= 0
966/// - Return the empty string if x < 0
967///
968/// # Examples
969/// ```
970/// use aws_smt_strings::smt_strings::*;
971///
972/// assert_eq!(str_from_int(0), SmtString::from("0"));
973/// assert_eq!(str_from_int(-1), EMPTY);
974/// assert_eq!(str_from_int(1002), SmtString::from("1002"));
975/// ```
976pub fn str_from_int(x: i32) -> SmtString {
977    if x >= 0 {
978        SmtString::from(x.to_string())
979    } else {
980        EMPTY
981    }
982}
983
984#[cfg(test)]
985mod tests {
986    use super::*;
987
988    #[test]
989    fn test_constructors() {
990        assert_eq!(EMPTY, "".into());
991        assert_eq!(SmtString::from("ABCD"), SmtString::from(&[65, 66, 67, 68]));
992        assert_eq!(
993            SmtString::from("AB\u{12ff}D"),
994            SmtString::from(&[65, 66, 0x12FF, 68])
995        );
996        assert_eq!(SmtString::from(0x1300), SmtString::from(&[0x1300u32]));
997        assert_eq!(SmtString::from(0x30000), SmtString::from(&[0xFFFD]));
998        assert_eq!(SmtString::from('K'), SmtString::from(&[75]));
999        assert_eq!(
1000            SmtString::from(&[0x0, 0x100, 0x2FFFF, 0x30000, 0x40000]),
1001            SmtString::from(&[0, 256, 0x2FFFF, 0xFFFD, 0xFFFD])
1002        );
1003    }
1004
1005    #[test]
1006    fn test_parsing() {
1007        assert_eq!(parse_smt_literal(""), EMPTY);
1008        assert_eq!(
1009            parse_smt_literal("A\"BB"),
1010            SmtString::from(&[65, 34, 66, 66])
1011        );
1012        assert_eq!(
1013            parse_smt_literal("abcd"),
1014            SmtString::from(&[97, 98, 99, 100])
1015        );
1016        assert_eq!(parse_smt_literal(r"\u{1aB6e}"), SmtString::from(0x1AB6E));
1017
1018        assert_eq!(
1019            parse_smt_literal(r"\u2CA"),
1020            SmtString::from(&[92, 117, 50, 67, 65])
1021        );
1022        assert_eq!(
1023            parse_smt_literal(r"\u{ACG}A"),
1024            SmtString::from(&[92, 117, 123, 65, 67, 71, 125, 65])
1025        );
1026        assert_eq!(
1027            parse_smt_literal(r"\u{}"),
1028            SmtString::from(&[92, 117, 123, 125])
1029        );
1030        assert_eq!(
1031            parse_smt_literal(r"\u{3ffff}"),
1032            SmtString::from(&[92, 117, 123, 51, 102, 102, 102, 102, 125])
1033        );
1034        assert_eq!(
1035            parse_smt_literal(r"\u{\u{09}"),
1036            SmtString::from(&[92, 117, 123, 9])
1037        );
1038        assert_eq!(
1039            parse_smt_literal(r"\u\u{09}"),
1040            SmtString::from(&[92, 117, 9])
1041        );
1042        assert_eq!(parse_smt_literal(r"\\u{09}"), SmtString::from(&[92, 9]));
1043    }
1044
1045    #[test]
1046    fn test_format() {
1047        assert_eq!(EMPTY.to_string(), r#""""#);
1048        assert_eq!(SmtString::from(&[65, 34, 66]).to_string(), r#""A""B""#);
1049        assert_eq!(SmtString::from("abcd").to_string(), r#""abcd""#);
1050        assert_eq!(
1051            parse_smt_literal(r"\u{1aB6e}").to_string(),
1052            r#""\u{1ab6e}""#
1053        );
1054        assert_eq!(parse_smt_literal(r"\u{12DD}").to_string(), r#""\u12dd""#);
1055        assert_eq!(SmtString::from(0).to_string(), r#""\u{00}""#);
1056    }
1057
1058    #[test]
1059    fn test_concat() {
1060        let s1 = SmtString::from("abcd");
1061        let s2 = SmtString::from("efg");
1062        assert_eq!(str_concat(&s1, &s2), SmtString::from("abcdefg"));
1063        assert_eq!(str_concat(&s1, &EMPTY), s1);
1064        assert_eq!(str_concat(&EMPTY, &s2), s2);
1065        assert_eq!(str_concat(&EMPTY, &EMPTY), EMPTY);
1066    }
1067
1068    #[test]
1069    fn test_length() {
1070        let s1 = SmtString::from("abcd");
1071        let s2 = SmtString::from("\u{01dd}");
1072        assert_eq!(str_len(&s1), 4);
1073        assert_eq!(str_len(&s2), 1);
1074        assert_eq!(str_len(&EMPTY), 0);
1075    }
1076
1077    #[test]
1078    fn test_at() {
1079        let s = SmtString::from("abcde");
1080        assert_eq!(str_at(&s, 0), SmtString::from('a'));
1081        assert_eq!(str_at(&s, 2), SmtString::from('c'));
1082        assert_eq!(str_at(&s, 4), SmtString::from('e'));
1083        assert_eq!(str_at(&s, 5), EMPTY);
1084        assert_eq!(str_at(&s, -1), EMPTY);
1085        assert_eq!(str_at(&EMPTY, 0), EMPTY);
1086    }
1087
1088    #[test]
1089    fn test_substr() {
1090        let s = SmtString::from("abcdef");
1091        assert_eq!(str_substr(&s, 2, 3), SmtString::from("cde"));
1092        assert_eq!(str_substr(&s, 0, str_len(&s)), s);
1093        assert_eq!(str_substr(&s, 2, 10), SmtString::from("cdef"));
1094        assert_eq!(str_substr(&s, 2, 0), EMPTY);
1095        assert_eq!(str_substr(&s, 6, 4), EMPTY);
1096    }
1097
1098    #[test]
1099    fn test_lexorder() {
1100        let s1 = SmtString::from("abcdef");
1101        let s2 = SmtString::from("abcd");
1102        let s3 = SmtString::from("bbb");
1103
1104        assert!(str_lt(&s2, &s1));
1105        assert!(str_lt(&s1, &s3));
1106        assert!(str_lt(&EMPTY, &s3));
1107
1108        assert!(!str_lt(&s1, &s2));
1109        assert!(!str_lt(&s2, &s2));
1110        assert!(!str_lt(&s2, &EMPTY));
1111        assert!(!str_lt(&EMPTY, &EMPTY));
1112
1113        assert!(str_le(&s2, &s1));
1114        assert!(str_le(&s1, &s3));
1115        assert!(str_le(&s2, &s2));
1116        assert!(str_le(&EMPTY, &s3));
1117        assert!(str_le(&EMPTY, &EMPTY));
1118
1119        assert!(!str_le(&s1, &s2));
1120        assert!(!str_le(&s2, &EMPTY));
1121    }
1122
1123    #[test]
1124    fn test_substrings() {
1125        let s1 = SmtString::from("abcdef");
1126        let s2 = SmtString::from("abcd");
1127        let s3 = SmtString::from("bbb");
1128        let s4 = SmtString::from("def");
1129        let s5 = SmtString::from("bc");
1130
1131        assert!(str_prefixof(&s2, &s1));
1132        assert!(str_prefixof(&s1, &s1));
1133        assert!(str_prefixof(&EMPTY, &s3));
1134        assert!(str_prefixof(&EMPTY, &EMPTY));
1135
1136        assert!(!str_prefixof(&s1, &s2));
1137        assert!(!str_prefixof(&s3, &s1));
1138        assert!(!str_prefixof(&s1, &EMPTY));
1139        assert!(!str_prefixof(&s5, &s1));
1140
1141        assert!(str_suffixof(&s4, &s1));
1142        assert!(str_suffixof(&s1, &s1));
1143        assert!(str_suffixof(&EMPTY, &s3));
1144        assert!(str_suffixof(&EMPTY, &EMPTY));
1145
1146        assert!(!str_suffixof(&s1, &s2));
1147        assert!(!str_suffixof(&s3, &s1));
1148        assert!(!str_suffixof(&s1, &EMPTY));
1149        assert!(!str_suffixof(&s5, &s1));
1150
1151        assert!(str_contains(&s1, &s2));
1152        assert!(str_contains(&s1, &s1));
1153        assert!(str_contains(&s1, &s4));
1154        assert!(str_contains(&s1, &s5));
1155        assert!(str_contains(&s3, &EMPTY));
1156        assert!(str_contains(&EMPTY, &EMPTY));
1157
1158        assert!(!str_contains(&s2, &s1));
1159        assert!(!str_contains(&s1, &s3));
1160        assert!(!str_contains(&EMPTY, &s1));
1161    }
1162
1163    #[test]
1164    fn test_indexof() {
1165        let s1 = SmtString::from("abcdef");
1166        let s2 = SmtString::from("cde");
1167        let s3 = SmtString::from("cdd");
1168
1169        assert_eq!(str_indexof(&s1, &s2, 0), 2);
1170        assert_eq!(str_indexof(&s1, &s2, 2), 2);
1171        assert_eq!(str_indexof(&s1, &s2, 3), -1);
1172        assert_eq!(str_indexof(&s1, &s1, 0), 0);
1173        assert_eq!(str_indexof(&s1, &EMPTY, 4), 4);
1174        assert_eq!(str_indexof(&s1, &s3, 0), -1);
1175        assert_eq!(str_indexof(&s1, &s2, -10), -1);
1176        assert_eq!(str_indexof(&s1, &s1, 1), -1);
1177        assert_eq!(str_indexof(&EMPTY, &s1, 2), -1);
1178    }
1179
1180    #[test]
1181    fn test_replace() {
1182        let s1 = SmtString::from("abcdef");
1183        let s2 = SmtString::from("cde");
1184        let s3 = SmtString::from("Z");
1185        let s4 = SmtString::from("VWXYZ");
1186
1187        assert_eq!(str_replace(&s1, &s2, &s3), SmtString::from("abZf"));
1188        assert_eq!(str_replace(&s1, &s2, &s4), SmtString::from("abVWXYZf"));
1189        assert_eq!(str_replace(&s1, &s2, &s2), s1);
1190        assert_eq!(str_replace(&s1, &s3, &s4), s1);
1191        assert_eq!(str_replace(&s1, &EMPTY, &s3), SmtString::from("Zabcdef"));
1192        assert_eq!(str_replace(&s4, &s3, &EMPTY), SmtString::from("VWXY"));
1193    }
1194
1195    #[test]
1196    fn test_replace_all() {
1197        let s1 = SmtString::from("abcdcdef");
1198        let s2 = SmtString::from("cd");
1199        let s3 = SmtString::from("Z");
1200        let s4 = SmtString::from("VWX");
1201        let s5 = SmtString::from("f");
1202
1203        assert_eq!(str_replace_all(&s1, &s2, &s3), "abZZef".into());
1204        assert_eq!(str_replace_all(&s1, &s2, &s4), "abVWXVWXef".into());
1205        assert_eq!(str_replace_all(&s1, &EMPTY, &s2), s1);
1206        assert_eq!(str_replace_all(&s1, &s3, &s4), s1);
1207        assert_eq!(str_replace_all(&s1, &s2, &EMPTY), "abef".into());
1208        assert_eq!(str_replace_all(&s1, &s5, &s2), "abcdcdecd".into());
1209    }
1210
1211    #[test]
1212    fn test_is_digit() {
1213        assert!(str_is_digit(&SmtString::from("0")));
1214        assert!(str_is_digit(&SmtString::from('5')));
1215        assert!(str_is_digit(&SmtString::from("9")));
1216        assert!(!str_is_digit(&SmtString::from("10")));
1217        assert!(!str_is_digit(&EMPTY));
1218        assert!(!str_is_digit(&SmtString::from("A")));
1219    }
1220
1221    #[test]
1222    fn test_code() {
1223        assert_eq!(str_to_code(&EMPTY), -1);
1224        assert_eq!(str_to_code(&SmtString::from(1202)), 1202);
1225        assert_eq!(str_to_code(&SmtString::from("abc")), -1);
1226
1227        assert_eq!(str_from_code(-19), EMPTY);
1228        assert_eq!(str_from_code(1202), SmtString::from(1202));
1229        assert_eq!(str_from_code(0x30000), EMPTY);
1230    }
1231
1232    #[test]
1233    fn test_int() {
1234        assert_eq!(str_to_int(&SmtString::from("00982")), 982);
1235        assert_eq!(str_to_int(&EMPTY), -1);
1236        assert_eq!(str_to_int(&SmtString::from("101aaabb")), -1);
1237
1238        assert_eq!(str_from_int(0), SmtString::from("0"));
1239        assert_eq!(str_from_int(-1), EMPTY);
1240        assert_eq!(str_from_int(1002), SmtString::from("1002"));
1241    }
1242}