#![allow(clippy::many_single_char_names)]
#![allow(clippy::manual_range_contains)]
use crate::matcher::{naive_search, SearchResult};
use std::slice::Iter;
use std::{char, cmp, fmt};
pub const MAX_LENGTH: i32 = i32::MAX;
pub const MAX_CHAR: u32 = 0x2FFFF;
pub const REPLACEMENT_CHAR: u32 = 0xFFFD;
#[derive(Debug, PartialEq, Eq, Clone, Hash)]
pub struct SmtString {
s: Vec<u32>,
}
pub const EMPTY: SmtString = SmtString { s: vec![] };
impl SmtString {
fn make(a: Vec<u32>) -> SmtString {
let n = a.len();
if n > MAX_LENGTH as usize {
panic!(
"Cannot construct a string of length {}: max length is {}",
n, MAX_LENGTH
);
}
SmtString { s: a }
}
fn make_from_slice(a: &[u32]) -> SmtString {
Self::make(a.to_vec())
}
pub fn is_good(&self) -> bool {
let n = self.s.len();
n < MAX_LENGTH as usize && good_string(&self.s)
}
pub fn is_unicode(&self) -> bool {
all_unicode(&self.s)
}
pub fn to_unicode_string(&self) -> String {
map_to_unicode(&self.s)
}
pub fn len(&self) -> usize {
self.s.len()
}
pub fn is_empty(&self) -> bool {
self.s.is_empty()
}
pub fn char(&self, i: usize) -> u32 {
self.s[i]
}
pub fn iter(&self) -> Iter<'_, u32> {
self.s.iter()
}
}
impl AsRef<[u32]> for SmtString {
fn as_ref(&self) -> &[u32] {
self.s.as_ref()
}
}
impl From<&str> for SmtString {
fn from(x: &str) -> Self {
SmtString::make(x.chars().map(|c| c as u32).collect())
}
}
impl From<String> for SmtString {
fn from(x: String) -> Self {
SmtString::from(x.as_str())
}
}
impl From<&[u32]> for SmtString {
fn from(a: &[u32]) -> Self {
SmtString::make(
a.iter()
.map(|&x| if x <= MAX_CHAR { x } else { REPLACEMENT_CHAR })
.collect(),
)
}
}
impl<const N: usize> From<&[u32; N]> for SmtString {
fn from(a: &[u32; N]) -> Self {
a[..].into()
}
}
impl From<Vec<u32>> for SmtString {
fn from(a: Vec<u32>) -> Self {
if a.iter().all(|&x| x <= MAX_CHAR) {
SmtString::make(a)
} else {
a[..].into()
}
}
}
impl From<u32> for SmtString {
fn from(x: u32) -> Self {
let x = if x <= MAX_CHAR { x } else { REPLACEMENT_CHAR };
SmtString::make(vec![x])
}
}
impl From<char> for SmtString {
fn from(x: char) -> SmtString {
SmtString::make(vec![x as u32])
}
}
#[derive(Debug, PartialEq, Eq)]
enum State {
Init,
AfterSlash,
AfterSlashU,
AfterSlashUHex,
AfterSlashUBrace,
}
#[derive(Debug, PartialEq, Eq)]
struct ParsingAutomaton {
state: State,
string_so_far: Vec<u32>,
pending: [u32; 9],
pending_idx: usize,
escape_code: u32,
}
fn new_automaton() -> ParsingAutomaton {
ParsingAutomaton {
state: State::Init,
string_so_far: Vec::new(),
pending: [0; 9],
pending_idx: 0,
escape_code: 0,
}
}
impl ParsingAutomaton {
fn push(&mut self, x: char) {
self.string_so_far.push(x as u32);
}
fn pending(&mut self, x: char) {
let i = self.pending_idx;
assert!(i < 9);
self.pending[i] = x as u32;
self.pending_idx += 1;
}
fn consume(&mut self, x: char) {
if x == '\\' {
self.pending(x);
self.state = State::AfterSlash;
} else {
self.push(x);
}
}
fn flush_pending(&mut self) {
let pending = &self.pending[0..self.pending_idx];
self.string_so_far.extend_from_slice(pending);
self.pending_idx = 0;
self.escape_code = 0;
self.state = State::Init;
}
fn close_escape_seq(&mut self) {
self.string_so_far.push(self.escape_code);
self.pending_idx = 0;
self.escape_code = 0;
self.state = State::Init;
}
fn add_hex(&mut self, x: char) {
let hex = x.to_digit(16).unwrap();
self.escape_code = self.escape_code << 4 | hex;
self.pending(x);
}
fn accept(&mut self, x: char) {
match self.state {
State::Init => {
self.consume(x);
}
State::AfterSlash => {
if x == 'u' {
self.pending(x);
self.state = State::AfterSlashU;
} else {
self.flush_pending();
self.consume(x);
}
}
State::AfterSlashU => {
if x == '{' {
self.pending(x);
self.state = State::AfterSlashUBrace;
} else if x.is_ascii_hexdigit() {
self.add_hex(x);
self.state = State::AfterSlashUHex;
} else {
self.flush_pending();
self.consume(x);
}
}
State::AfterSlashUBrace => {
if x == '}' && self.pending_idx > 3 && self.escape_code <= MAX_CHAR {
self.close_escape_seq();
} else if x.is_ascii_hexdigit() && self.pending_idx < 8 {
self.add_hex(x);
} else {
self.flush_pending();
self.consume(x);
}
}
State::AfterSlashUHex => {
if x.is_ascii_hexdigit() {
self.add_hex(x);
if self.pending_idx == 6 {
self.close_escape_seq();
}
} else {
self.flush_pending();
self.consume(x);
}
}
}
}
}
pub fn parse_smt_literal(a: &str) -> SmtString {
let mut parser = new_automaton();
for x in a.chars() {
parser.accept(x);
}
parser.flush_pending();
SmtString::make(parser.string_so_far)
}
pub fn good_char(x: u32) -> bool {
x <= MAX_CHAR
}
pub fn good_string(a: &[u32]) -> bool {
a.iter().all(|&x| x <= MAX_CHAR)
}
pub fn smt_char_as_string(x: u32) -> String {
if x == '"' as u32 {
"\"\"".to_string()
} else if x >= 32 && x < 127 {
char::from_u32(x).unwrap().to_string()
} else if x < 32 || x == 127 {
format!("\\u{{{:02x}}}", x)
} else if x < 0x10000 {
format!("\\u{:04x}", x)
} else {
format!("\\u{{{:x}}}", x)
}
}
impl fmt::Display for SmtString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "\"")?;
for &x in self.s.iter() {
if x == '"' as u32 {
write!(f, "\"\"")?;
} else if x >= 32 && x < 127 {
write!(f, "{}", char::from_u32(x).unwrap())?;
} else if x < 32 || x == 127 {
write!(f, "\\u{{{:02x}}}", x)?;
} else if x < 0x10000 {
write!(f, "\\u{:04x}", x)?;
} else {
write!(f, "\\u{{{:x}}}", x)?;
}
}
write!(f, "\"")
}
}
pub fn char_to_smt(x: u32) -> String {
if x == '"' as u32 {
"\"\"".to_string()
} else if x >= 32 && x < 127 {
char::from_u32(x).unwrap().to_string()
} else if x < 32 || x == 127 {
format!("\\u{{{:02x}}}", x)
} else if x < 0x10000 {
format!("\\u{:04x}", x)
} else {
format!("\\u{{{:x}}}", x)
}
}
#[allow(dead_code)]
fn hex(i: u32) -> char {
char::from_digit(i & 0xF, 16).unwrap()
}
#[allow(dead_code)]
fn append_smt_char(mut s: String, x: u32) -> String {
if x == '"' as u32 {
s.push_str("\"\"")
} else if x >= 32 && x < 127 {
s.push(char::from_u32(x).unwrap())
} else if x < 32 {
s.push_str("\\u{");
s.push(hex(x >> 4));
s.push(hex(x));
s.push('}');
} else if x < 0x10000 {
s.push_str("\\u");
s.push(hex(x >> 12));
s.push(hex(x >> 8));
s.push(hex(x >> 4));
s.push(hex(x));
} else {
s.push_str("\\u{");
s.push(hex(x >> 16));
s.push(hex(x >> 12));
s.push(hex(x >> 8));
s.push(hex(x >> 4));
s.push(hex(x));
s.push('}');
};
s
}
fn all_unicode(v: &[u32]) -> bool {
v.iter().all(|&x| char::from_u32(x).is_some())
}
fn map_to_unicode(v: &[u32]) -> String {
v.iter()
.map(|&x| char::from_u32(x).unwrap_or(char::REPLACEMENT_CHARACTER))
.collect()
}
fn char_is_digit(x: u32) -> bool {
x >= '0' as u32 && x <= '9' as u32
}
fn vector_lt(v: &[u32], w: &[u32]) -> bool {
let max = cmp::min(v.len(), w.len());
let mut i = 0;
while i < max && v[i] == w[i] {
i += 1;
}
if i == max {
v.len() < w.len()
} else {
v[i] < w[i]
}
}
fn vector_le(v: &[u32], w: &[u32]) -> bool {
let max = cmp::min(v.len(), w.len());
let mut i = 0;
while i < max && v[i] == w[i] {
i += 1;
}
if i == max {
v.len() <= w.len()
} else {
v[i] < w[i]
}
}
fn vector_prefix(v: &[u32], w: &[u32]) -> bool {
let n = v.len();
if n <= w.len() {
let mut i = 0;
while i < n && v[i] == w[i] {
i += 1;
}
i == n
} else {
false }
}
fn vector_suffix(v: &[u32], w: &[u32]) -> bool {
let n = v.len();
let m = w.len();
if n <= m {
let k = m - n;
let mut i = 0;
while i < n && v[i] == w[i + k] {
i += 1;
}
i == n
} else {
false
}
}
fn vector_concat(v: &[u32], w: &[u32]) -> Vec<u32> {
let mut x = Vec::new();
x.extend_from_slice(v);
x.extend_from_slice(w);
x
}
fn find_sub_vector(v: &[u32], w: &[u32], i: usize) -> SearchResult {
naive_search(v, w, i)
}
pub fn str_concat(s1: &SmtString, s2: &SmtString) -> SmtString {
SmtString::make(vector_concat(&s1.s, &s2.s))
}
pub fn str_len(s: &SmtString) -> i32 {
s.len() as i32
}
pub fn str_at(s: &SmtString, i: i32) -> SmtString {
if i < 0 || i >= s.len() as i32 {
EMPTY
} else {
SmtString::from(s.s[i as usize])
}
}
pub fn str_substr(s: &SmtString, i: i32, n: i32) -> SmtString {
if i < 0 || i >= s.len() as i32 || n <= 0 {
EMPTY
} else {
let i = i as usize;
let n = n as usize;
let j = cmp::min(i + n, s.s.len());
SmtString::make_from_slice(s.s.get(i..j).unwrap())
}
}
pub fn str_lt(s1: &SmtString, s2: &SmtString) -> bool {
vector_lt(&s1.s, &s2.s)
}
pub fn str_le(s1: &SmtString, s2: &SmtString) -> bool {
vector_le(&s1.s, &s2.s)
}
pub fn str_prefixof(s1: &SmtString, s2: &SmtString) -> bool {
vector_prefix(&s1.s, &s2.s)
}
pub fn str_suffixof(s1: &SmtString, s2: &SmtString) -> bool {
vector_suffix(&s1.s, &s2.s)
}
pub fn str_contains(s1: &SmtString, s2: &SmtString) -> bool {
match find_sub_vector(&s2.s, &s1.s, 0) {
SearchResult::NotFound => false,
SearchResult::Found(..) => true,
}
}
pub fn str_indexof(s1: &SmtString, s2: &SmtString, i: i32) -> i32 {
if i < 0 || i >= s1.len() as i32 {
-1
} else {
match find_sub_vector(&s2.s, &s1.s, i as usize) {
SearchResult::NotFound => -1,
SearchResult::Found(k, _) => k as i32,
}
}
}
pub fn str_replace(s: &SmtString, p: &SmtString, r: &SmtString) -> SmtString {
let s = &s.s;
let p = &p.s;
let r = &r.s;
match find_sub_vector(p, s, 0) {
SearchResult::NotFound => SmtString::make_from_slice(s),
SearchResult::Found(i, j) => {
let mut x = Vec::new();
x.extend_from_slice(&s[..i]);
x.extend_from_slice(r);
x.extend_from_slice(&s[j..]);
SmtString::make(x)
}
}
}
pub fn str_replace_all(s: &SmtString, p: &SmtString, r: &SmtString) -> SmtString {
if p.is_empty() {
SmtString::make_from_slice(&s.s)
} else {
let s = &s.s;
let p = &p.s;
let r = &r.s;
let mut x = Vec::new();
let mut i = 0;
while let SearchResult::Found(j, k) = find_sub_vector(p, s, i) {
x.extend_from_slice(&s[i..j]);
x.extend_from_slice(r);
i = k;
}
x.extend_from_slice(&s[i..]);
SmtString::make(x)
}
}
pub fn str_is_digit(s: &SmtString) -> bool {
s.len() == 1 && char_is_digit(s.s[0])
}
pub fn str_to_code(s: &SmtString) -> i32 {
if s.len() == 1 {
s.s[0] as i32
} else {
-1
}
}
pub fn str_from_code(x: i32) -> SmtString {
if 0 <= x && x <= MAX_CHAR as i32 {
SmtString::from(x as u32)
} else {
EMPTY
}
}
pub fn str_to_int(s: &SmtString) -> i32 {
if s.is_empty() {
return -1;
}
let mut x: i32 = 0;
for &d in &s.s {
if char_is_digit(d) {
let y = 10 * x + (d as i32 - '0' as i32);
if y < x {
panic!("Arithmetic overflow in str_to_int");
}
x = y;
} else {
return -1;
}
}
x
}
pub fn str_from_int(x: i32) -> SmtString {
if x >= 0 {
SmtString::from(x.to_string())
} else {
EMPTY
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_constructors() {
assert_eq!(EMPTY, "".into());
assert_eq!(SmtString::from("ABCD"), SmtString::from(&[65, 66, 67, 68]));
assert_eq!(
SmtString::from("AB\u{12ff}D"),
SmtString::from(&[65, 66, 0x12FF, 68])
);
assert_eq!(SmtString::from(0x1300), SmtString::from(&[0x1300u32]));
assert_eq!(SmtString::from(0x30000), SmtString::from(&[0xFFFD]));
assert_eq!(SmtString::from('K'), SmtString::from(&[75]));
assert_eq!(
SmtString::from(&[0x0, 0x100, 0x2FFFF, 0x30000, 0x40000]),
SmtString::from(&[0, 256, 0x2FFFF, 0xFFFD, 0xFFFD])
);
}
#[test]
fn test_parsing() {
assert_eq!(parse_smt_literal(""), EMPTY);
assert_eq!(
parse_smt_literal("A\"BB"),
SmtString::from(&[65, 34, 66, 66])
);
assert_eq!(
parse_smt_literal("abcd"),
SmtString::from(&[97, 98, 99, 100])
);
assert_eq!(parse_smt_literal(r"\u{1aB6e}"), SmtString::from(0x1AB6E));
assert_eq!(
parse_smt_literal(r"\u2CA"),
SmtString::from(&[92, 117, 50, 67, 65])
);
assert_eq!(
parse_smt_literal(r"\u{ACG}A"),
SmtString::from(&[92, 117, 123, 65, 67, 71, 125, 65])
);
assert_eq!(
parse_smt_literal(r"\u{}"),
SmtString::from(&[92, 117, 123, 125])
);
assert_eq!(
parse_smt_literal(r"\u{3ffff}"),
SmtString::from(&[92, 117, 123, 51, 102, 102, 102, 102, 125])
);
assert_eq!(
parse_smt_literal(r"\u{\u{09}"),
SmtString::from(&[92, 117, 123, 9])
);
assert_eq!(
parse_smt_literal(r"\u\u{09}"),
SmtString::from(&[92, 117, 9])
);
assert_eq!(parse_smt_literal(r"\\u{09}"), SmtString::from(&[92, 9]));
}
#[test]
fn test_format() {
assert_eq!(EMPTY.to_string(), r#""""#);
assert_eq!(SmtString::from(&[65, 34, 66]).to_string(), r#""A""B""#);
assert_eq!(SmtString::from("abcd").to_string(), r#""abcd""#);
assert_eq!(
parse_smt_literal(r"\u{1aB6e}").to_string(),
r#""\u{1ab6e}""#
);
assert_eq!(parse_smt_literal(r"\u{12DD}").to_string(), r#""\u12dd""#);
assert_eq!(SmtString::from(0).to_string(), r#""\u{00}""#);
}
#[test]
fn test_concat() {
let s1 = SmtString::from("abcd");
let s2 = SmtString::from("efg");
assert_eq!(str_concat(&s1, &s2), SmtString::from("abcdefg"));
assert_eq!(str_concat(&s1, &EMPTY), s1);
assert_eq!(str_concat(&EMPTY, &s2), s2);
assert_eq!(str_concat(&EMPTY, &EMPTY), EMPTY);
}
#[test]
fn test_length() {
let s1 = SmtString::from("abcd");
let s2 = SmtString::from("\u{01dd}");
assert_eq!(str_len(&s1), 4);
assert_eq!(str_len(&s2), 1);
assert_eq!(str_len(&EMPTY), 0);
}
#[test]
fn test_at() {
let s = SmtString::from("abcde");
assert_eq!(str_at(&s, 0), SmtString::from('a'));
assert_eq!(str_at(&s, 2), SmtString::from('c'));
assert_eq!(str_at(&s, 4), SmtString::from('e'));
assert_eq!(str_at(&s, 5), EMPTY);
assert_eq!(str_at(&s, -1), EMPTY);
assert_eq!(str_at(&EMPTY, 0), EMPTY);
}
#[test]
fn test_substr() {
let s = SmtString::from("abcdef");
assert_eq!(str_substr(&s, 2, 3), SmtString::from("cde"));
assert_eq!(str_substr(&s, 0, str_len(&s)), s);
assert_eq!(str_substr(&s, 2, 10), SmtString::from("cdef"));
assert_eq!(str_substr(&s, 2, 0), EMPTY);
assert_eq!(str_substr(&s, 6, 4), EMPTY);
}
#[test]
fn test_lexorder() {
let s1 = SmtString::from("abcdef");
let s2 = SmtString::from("abcd");
let s3 = SmtString::from("bbb");
assert!(str_lt(&s2, &s1));
assert!(str_lt(&s1, &s3));
assert!(str_lt(&EMPTY, &s3));
assert!(!str_lt(&s1, &s2));
assert!(!str_lt(&s2, &s2));
assert!(!str_lt(&s2, &EMPTY));
assert!(!str_lt(&EMPTY, &EMPTY));
assert!(str_le(&s2, &s1));
assert!(str_le(&s1, &s3));
assert!(str_le(&s2, &s2));
assert!(str_le(&EMPTY, &s3));
assert!(str_le(&EMPTY, &EMPTY));
assert!(!str_le(&s1, &s2));
assert!(!str_le(&s2, &EMPTY));
}
#[test]
fn test_substrings() {
let s1 = SmtString::from("abcdef");
let s2 = SmtString::from("abcd");
let s3 = SmtString::from("bbb");
let s4 = SmtString::from("def");
let s5 = SmtString::from("bc");
assert!(str_prefixof(&s2, &s1));
assert!(str_prefixof(&s1, &s1));
assert!(str_prefixof(&EMPTY, &s3));
assert!(str_prefixof(&EMPTY, &EMPTY));
assert!(!str_prefixof(&s1, &s2));
assert!(!str_prefixof(&s3, &s1));
assert!(!str_prefixof(&s1, &EMPTY));
assert!(!str_prefixof(&s5, &s1));
assert!(str_suffixof(&s4, &s1));
assert!(str_suffixof(&s1, &s1));
assert!(str_suffixof(&EMPTY, &s3));
assert!(str_suffixof(&EMPTY, &EMPTY));
assert!(!str_suffixof(&s1, &s2));
assert!(!str_suffixof(&s3, &s1));
assert!(!str_suffixof(&s1, &EMPTY));
assert!(!str_suffixof(&s5, &s1));
assert!(str_contains(&s1, &s2));
assert!(str_contains(&s1, &s1));
assert!(str_contains(&s1, &s4));
assert!(str_contains(&s1, &s5));
assert!(str_contains(&s3, &EMPTY));
assert!(str_contains(&EMPTY, &EMPTY));
assert!(!str_contains(&s2, &s1));
assert!(!str_contains(&s1, &s3));
assert!(!str_contains(&EMPTY, &s1));
}
#[test]
fn test_indexof() {
let s1 = SmtString::from("abcdef");
let s2 = SmtString::from("cde");
let s3 = SmtString::from("cdd");
assert_eq!(str_indexof(&s1, &s2, 0), 2);
assert_eq!(str_indexof(&s1, &s2, 2), 2);
assert_eq!(str_indexof(&s1, &s2, 3), -1);
assert_eq!(str_indexof(&s1, &s1, 0), 0);
assert_eq!(str_indexof(&s1, &EMPTY, 4), 4);
assert_eq!(str_indexof(&s1, &s3, 0), -1);
assert_eq!(str_indexof(&s1, &s2, -10), -1);
assert_eq!(str_indexof(&s1, &s1, 1), -1);
assert_eq!(str_indexof(&EMPTY, &s1, 2), -1);
}
#[test]
fn test_replace() {
let s1 = SmtString::from("abcdef");
let s2 = SmtString::from("cde");
let s3 = SmtString::from("Z");
let s4 = SmtString::from("VWXYZ");
assert_eq!(str_replace(&s1, &s2, &s3), SmtString::from("abZf"));
assert_eq!(str_replace(&s1, &s2, &s4), SmtString::from("abVWXYZf"));
assert_eq!(str_replace(&s1, &s2, &s2), s1);
assert_eq!(str_replace(&s1, &s3, &s4), s1);
assert_eq!(str_replace(&s1, &EMPTY, &s3), SmtString::from("Zabcdef"));
assert_eq!(str_replace(&s4, &s3, &EMPTY), SmtString::from("VWXY"));
}
#[test]
fn test_replace_all() {
let s1 = SmtString::from("abcdcdef");
let s2 = SmtString::from("cd");
let s3 = SmtString::from("Z");
let s4 = SmtString::from("VWX");
let s5 = SmtString::from("f");
assert_eq!(str_replace_all(&s1, &s2, &s3), "abZZef".into());
assert_eq!(str_replace_all(&s1, &s2, &s4), "abVWXVWXef".into());
assert_eq!(str_replace_all(&s1, &EMPTY, &s2), s1);
assert_eq!(str_replace_all(&s1, &s3, &s4), s1);
assert_eq!(str_replace_all(&s1, &s2, &EMPTY), "abef".into());
assert_eq!(str_replace_all(&s1, &s5, &s2), "abcdcdecd".into());
}
#[test]
fn test_is_digit() {
assert!(str_is_digit(&SmtString::from("0")));
assert!(str_is_digit(&SmtString::from('5')));
assert!(str_is_digit(&SmtString::from("9")));
assert!(!str_is_digit(&SmtString::from("10")));
assert!(!str_is_digit(&EMPTY));
assert!(!str_is_digit(&SmtString::from("A")));
}
#[test]
fn test_code() {
assert_eq!(str_to_code(&EMPTY), -1);
assert_eq!(str_to_code(&SmtString::from(1202)), 1202);
assert_eq!(str_to_code(&SmtString::from("abc")), -1);
assert_eq!(str_from_code(-19), EMPTY);
assert_eq!(str_from_code(1202), SmtString::from(1202));
assert_eq!(str_from_code(0x30000), EMPTY);
}
#[test]
fn test_int() {
assert_eq!(str_to_int(&SmtString::from("00982")), 982);
assert_eq!(str_to_int(&EMPTY), -1);
assert_eq!(str_to_int(&SmtString::from("101aaabb")), -1);
assert_eq!(str_from_int(0), SmtString::from("0"));
assert_eq!(str_from_int(-1), EMPTY);
assert_eq!(str_from_int(1002), SmtString::from("1002"));
}
}