cipherstash_core/
string.rs1use lazy_static::lazy_static;
2use num_bigint::{BigUint, ToBigUint};
3use regex::Regex;
4use thiserror::Error;
5
6#[derive(Error, Debug)]
7pub enum OrderiseStringError {
8 #[error("Can only order strings that are pure ASCII")]
9 NotAscii,
10}
11
12pub fn orderise_string(s: &str) -> Result<Vec<u64>, OrderiseStringError> {
13 if !s.is_ascii() {
14 return Err(OrderiseStringError::NotAscii);
15 }
16
17 lazy_static! {
18 static ref NON_ALPHANUMERIC_OR_SPACE_REGEX: Regex =
19 Regex::new("[^a-z0-9[:space:]]+").unwrap();
20 }
21
22 lazy_static! {
23 static ref SPACE_REGEX: Regex = Regex::new("[[:space:]]+").unwrap();
24 }
25
26 lazy_static! {
27 static ref DIGIT_REGEX: Regex = Regex::new("[0-9]").unwrap();
28 }
29
30 let mut s = s.to_lowercase();
39
40 s = NON_ALPHANUMERIC_OR_SPACE_REGEX
42 .replace_all(&s, "~")
43 .to_string();
44
45 s = SPACE_REGEX.replace_all(&s, "{").to_string();
47
48 s = DIGIT_REGEX.replace_all(&s, "|").to_string();
50
51 let mut n = s
55 .bytes()
56 .map(|c| BigUint::from(c - 96))
60 .fold(0.to_biguint().unwrap(), |i, c| (i << 5) + c);
63
64 n <<= 64 - (s.len() * 5) % 64;
69
70 let mut terms = Vec::new();
71
72 let two_pow_64 = 2.to_biguint().unwrap().pow(64);
73
74 while n > 0.to_biguint().unwrap() {
76 let term = (&n % &two_pow_64).try_into().unwrap();
79
80 terms.insert(0, term);
81
82 n >>= 64;
83 }
84
85 terms.truncate(6);
87
88 Ok(terms)
89}
90
91#[cfg(test)]
92mod tests {
93 use super::*;
94 use serde::Deserialize;
95 use std::{fs, path::Path};
96
97 #[test]
98 fn test_orderise_string_non_ascii() {
99 let result = orderise_string("JalapeƱo");
100
101 assert!(matches!(result, Err(OrderiseStringError::NotAscii)));
102
103 let message = result.err().unwrap().to_string();
104
105 assert_eq!(message, "Can only order strings that are pure ASCII")
106 }
107
108 #[test]
109 fn test_orderise_string_gives_same_output_as_ruby_clint() {
112 #[derive(Deserialize, Debug)]
113 struct TestCase {
114 input: String,
115 output: Vec<u64>,
116 }
117
118 let case_file_path =
119 Path::new(env!("CARGO_MANIFEST_DIR")).join("./orderise_string_test_cases.json");
120 let json_str = fs::read_to_string(case_file_path).expect("couldn't read test case file");
121 let test_cases: Vec<TestCase> =
122 serde_json::from_str(&json_str).expect("couldn't parse test cases");
123
124 for test_case in test_cases {
125 let result = orderise_string(&test_case.input);
126
127 assert!(
128 result.is_ok(),
129 "Expected orderise_string to succeed given {:?}, but got error: {:?}",
130 &test_case.input,
131 result
132 );
133
134 assert_eq!(
135 result.unwrap(),
136 test_case.output,
137 "\n orderise_string didn't match for input: {:?}",
138 test_case.input
139 );
140 }
141 }
142}