1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
use lazy_static::lazy_static;
use num_bigint::{BigUint, ToBigUint};
use regex::Regex;
use thiserror::Error;
#[derive(Error, Debug)]
pub enum OrderiseStringError {
#[error("Can only order strings that are pure ASCII")]
NotAscii,
}
pub fn orderise_string(s: &str) -> Result<Vec<u64>, OrderiseStringError> {
if !s.is_ascii() {
return Err(OrderiseStringError::NotAscii);
}
lazy_static! {
static ref NON_ALPHANUMERIC_OR_SPACE_REGEX: Regex =
Regex::new("[^a-z0-9[:space:]]+").unwrap();
}
lazy_static! {
static ref SPACE_REGEX: Regex = Regex::new("[[:space:]]+").unwrap();
}
lazy_static! {
static ref DIGIT_REGEX: Regex = Regex::new("[0-9]").unwrap();
}
let mut s = s.to_lowercase();
s = NON_ALPHANUMERIC_OR_SPACE_REGEX
.replace_all(&s, "~")
.to_string();
s = SPACE_REGEX.replace_all(&s, "{").to_string();
s = DIGIT_REGEX.replace_all(&s, "|").to_string();
let mut n = s
.bytes()
.map(|c| BigUint::from(c - 96))
.fold(0.to_biguint().unwrap(), |i, c| (i << 5) + c);
n <<= 64 - (s.len() * 5) % 64;
let mut terms = Vec::new();
let two_pow_64 = 2.to_biguint().unwrap().pow(64);
while n > 0.to_biguint().unwrap() {
let term = (&n % &two_pow_64).try_into().unwrap();
terms.insert(0, term);
n >>= 64;
}
terms.truncate(6);
Ok(terms)
}
#[cfg(test)]
mod tests {
use super::*;
use serde::Deserialize;
use std::{fs, path::Path};
#[test]
fn test_orderise_string_non_ascii() {
let result = orderise_string("Jalapeño");
assert!(matches!(result, Err(OrderiseStringError::NotAscii)));
let message = result.err().unwrap().to_string();
assert_eq!(message, "Can only order strings that are pure ASCII")
}
#[test]
fn test_orderise_string_gives_same_output_as_ruby_clint() {
#[derive(Deserialize, Debug)]
struct TestCase {
input: String,
output: Vec<u64>,
}
let case_file_path =
Path::new(env!("CARGO_MANIFEST_DIR")).join("./orderise_string_test_cases.json");
let json_str = fs::read_to_string(case_file_path).expect("couldn't read test case file");
let test_cases: Vec<TestCase> =
serde_json::from_str(&json_str).expect("couldn't parse test cases");
for test_case in test_cases {
let result = orderise_string(&test_case.input);
assert!(
result.is_ok(),
"Expected orderise_string to succeed given {:?}, but got error: {:?}",
&test_case.input,
result
);
assert_eq!(
result.unwrap(),
test_case.output,
"\n orderise_string didn't match for input: {:?}",
test_case.input
);
}
}
}