#[derive(Debug, Clone, PartialEq, Eq)]
pub struct PlaceholderToken {
pub index: Option<usize>,
pub kind: char, }
impl PlaceholderToken {
pub fn to_signature(&self) -> String {
match self.index {
Some(i) => format!("{}${}", i, self.kind),
None => format!("{}", self.kind),
}
}
}
pub fn extract_placeholders(input: &str) -> Vec<PlaceholderToken> {
let bytes = input.as_bytes();
let mut i = 0;
let mut out = Vec::new();
while i < bytes.len() {
if bytes[i] != b'%' {
i += 1;
continue;
}
if i + 1 < bytes.len() && bytes[i + 1] == b'%' {
i += 2;
continue;
}
let mut j = i + 1;
let mut index: Option<usize> = None;
let start_digits = j;
while j < bytes.len() && bytes[j].is_ascii_digit() {
j += 1;
}
if j < bytes.len() && j > start_digits && bytes[j] == b'$' {
if let Some(num) = std::str::from_utf8(&bytes[start_digits..j])
.ok()
.and_then(|s| s.parse::<usize>().ok())
{
index = Some(num);
}
j += 1; } else {
j = i + 1;
}
if j < bytes.len() && bytes[j] == b'l' {
j += 1;
if j < bytes.len() && bytes[j] == b'l' {
j += 1;
}
}
if j < bytes.len() {
let ch = bytes[j] as char;
if ch.is_ascii_alphabetic() || ch == '@' {
out.push(PlaceholderToken {
index,
kind: canonical_kind_char(ch),
});
i = j + 1;
continue;
}
}
i += 1;
}
out
}
pub fn normalize_placeholders(input: &str) -> String {
let bytes = input.as_bytes();
let mut i = 0;
let mut tmp = String::with_capacity(input.len());
while i < bytes.len() {
if bytes[i] == b'%' {
let mut j = i + 1;
let start_digits = j;
while j < bytes.len() && bytes[j].is_ascii_digit() {
j += 1;
}
if j > start_digits && j + 1 < bytes.len() && bytes[j] == b'$' && bytes[j + 1] == b'@' {
tmp.push('%');
tmp.push_str(&input[start_digits..j]); tmp.push('$');
tmp.push('s');
i = j + 2;
continue;
}
}
let ch = input[i..]
.chars()
.next()
.expect("valid UTF-8 slicing while scanning placeholders");
tmp.push(ch);
i += ch.len_utf8();
}
let out = tmp.replace("%@", "%s");
let out = out.replace("%ld", "%d");
out.replace("%lu", "%u")
}
pub fn to_ios_placeholders(input: &str) -> String {
let bytes = input.as_bytes();
let mut i = 0usize;
let mut out = String::with_capacity(input.len());
while i < bytes.len() {
if bytes[i] != b'%' {
let ch = input[i..]
.chars()
.next()
.expect("valid UTF-8 slicing while converting placeholders");
out.push(ch);
i += ch.len_utf8();
continue;
}
if i + 1 < bytes.len() && bytes[i + 1] == b'%' {
out.push('%');
out.push('%');
i += 2;
continue;
}
let mut j = i + 1;
let start_digits = j;
while j < bytes.len() && bytes[j].is_ascii_digit() {
j += 1;
}
let mut had_positional = false;
if j > start_digits && j < bytes.len() && bytes[j] == b'$' {
had_positional = true;
j += 1; } else {
j = i + 1;
}
let mut k = j;
while k < bytes.len() && bytes[k] == b'l' {
k += 1;
}
if k >= bytes.len() {
out.push('%');
i += 1;
continue;
}
let ty = bytes[k] as char;
if ty == 's' {
out.push('%');
if had_positional {
out.push_str(
&input[start_digits..(if start_digits < j {
j - 1
} else {
start_digits
})],
);
out.push('$');
}
out.push('@');
i = k + 1;
continue;
}
out.push('%');
i += 1;
}
out
}
pub fn signature(input: &str) -> Vec<String> {
extract_placeholders(&normalize_placeholders(input))
.into_iter()
.map(|t| t.to_signature())
.collect()
}
fn canonical_kind_char(ch: char) -> char {
match ch {
'@' => 's',
c => c.to_ascii_lowercase(),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_android_and_ios() {
let s = "Hello %1$@, you have %2$d items and %s extra";
let sig = signature(s);
assert_eq!(sig, vec!["1$s", "2$d", "s"]);
let s = "你好,%s";
let sig = signature(s);
assert_eq!(sig, vec!["s"]);
}
#[test]
fn test_normalize_ios_simple() {
let s = "Value: %@ and number %ld";
let n = normalize_placeholders(s);
assert!(n.contains("%s"));
assert!(n.contains("%d"));
assert_eq!(signature(s), vec!["s", "d"]);
}
#[test]
fn test_normalize_positional_object() {
let s = "Hello %1$@";
let n = normalize_placeholders(s);
assert!(n.contains("%1$s"));
}
#[test]
fn test_ignore_escaped_percent() {
let s = "Discount: 50%% and value %d";
let sig = signature(s);
assert_eq!(sig, vec!["d"]);
}
}