use regex::Regex;
use serde::ser::Serialize;
use serde_json::ser::{CharEscape, Formatter};
use std::io::Write;
use std::string::FromUtf8Error as Utf8Error;
struct JSONFormatter {}
#[derive(Debug)]
pub enum CanonicalJSONError {}
impl From<Utf8Error> for CanonicalJSONError {
fn from(err: Utf8Error) -> Self {
err.into()
}
}
impl From<serde_json::error::Error> for CanonicalJSONError {
fn from(err: serde_json::error::Error) -> Self {
err.into()
}
}
impl Formatter for JSONFormatter {
fn write_f64<W: ?Sized>(&mut self, writer: &mut W, value: f64) -> Result<(), std::io::Error>
where
W: Write,
{
format_number(writer, value)?;
Ok(())
}
fn write_char_escape<W: ?Sized>(
&mut self,
writer: &mut W,
char_escape: CharEscape,
) -> Result<(), std::io::Error>
where
W: Write,
{
match char_escape {
CharEscape::Quote => {
writer.write(b"\\\"")?;
}
CharEscape::ReverseSolidus => {
writer.write(b"\\\\")?;
}
CharEscape::LineFeed => {
writer.write(b"\\n")?;
}
CharEscape::Tab => {
writer.write(b"\\t")?;
}
CharEscape::CarriageReturn => {
writer.write(b"\\r")?;
}
CharEscape::Solidus => {
writer.write(b"\\/")?;
}
CharEscape::Backspace => {
writer.write(b"\\b")?;
}
CharEscape::FormFeed => {
writer.write(b"\\f")?;
}
CharEscape::AsciiControl(number) => {
static HEX_DIGITS: [u8; 16] = *b"0123456789abcdef";
let bytes = &[
b'\\',
b'u',
b'0',
b'0',
HEX_DIGITS[(number >> 4) as usize],
HEX_DIGITS[(number & 0xF) as usize],
];
return writer.write_all(bytes);
}
}
Ok(())
}
fn write_string_fragment<W: ?Sized>(
&mut self,
writer: &mut W,
fragment: &str,
) -> Result<(), std::io::Error>
where
W: Write,
{
let formatted_string = format!("{}", fragment)
.escape_default()
.to_string()
.replace(r#"\'"#, "'");
return normalize_unicode(writer, formatted_string).and(Ok(()));
}
}
fn format_number<W: ?Sized>(writer: &mut W, number: f64) -> Result<(), std::io::Error>
where
W: Write,
{
let formatted = format!("{:e}", number);
let normalized = normalize_number(formatted);
writer.write(&normalized.into_bytes())?;
Ok(())
}
fn normalize_number(input: String) -> String {
let re = Regex::new("(?:E(?:[+]0*|(-|)0+)|e(?:[+]|(-|))0*)([0-9])").unwrap();
re.replace_all(&input, "E$1$2$3").to_string()
}
fn normalize_unicode<W: ?Sized>(
writer: &mut W,
serialized_string: String,
) -> Result<(), std::io::Error>
where
W: Write,
{
let mut string_iter = serialized_string.chars().peekable();
while let Some(curr_char) = string_iter.next() {
if curr_char == '\\' && string_iter.peek() == Some(&'u') {
writer.write(&"\\u".as_bytes())?;
string_iter.next();
if string_iter.peek() == Some(&'{') {
let mut characters = String::new();
string_iter.next();
let mut index = 0;
while index < 4 && string_iter.peek() != Some(&'}') && string_iter.peek() != None {
match string_iter.peek() {
Some(character) => characters.push(*character),
None => break,
};
string_iter.next();
index += 1;
}
if string_iter.peek() == None {
writer.write(&"{".as_bytes())?;
writer.write(&characters.into_bytes())?;
} else if string_iter.peek() == Some(&'}') {
if characters.len() == 0 {
writer.write(&"{}".as_bytes())?;
} else {
writer.write(
&std::iter::repeat("0")
.take(4 - characters.len())
.collect::<String>()
.into_bytes(),
)?;
writer.write(&characters.into_bytes())?;
string_iter.next();
}
}
}
continue;
}
writer.write(curr_char.to_string().as_bytes())?;
}
Ok(())
}
pub fn to_string(input: &serde_json::Value) -> Result<String, CanonicalJSONError> {
let string = vec![];
let mut serializer = serde_json::Serializer::with_formatter(string, JSONFormatter {});
input.serialize(&mut serializer)?;
let serialized_string = String::from_utf8(serializer.into_inner())?;
Ok(serialized_string)
}
#[cfg(test)]
mod tests {
use super::to_string;
use serde_json::json;
macro_rules! test_canonical_json {
($v:tt, $e:expr) => {
match to_string(&json!($v)) {
Ok(serialized_string) => {
println!("serialized is {}", serialized_string);
assert_eq!(serialized_string, $e)
},
Err(error) => { panic!("error serializing input : {:?}", error) }
};
};
}
#[test]
fn test_to_string() {
test_canonical_json!(null, "null");
test_canonical_json!((std::f64::NAN), "null");
test_canonical_json!((std::f64::INFINITY), "null");
test_canonical_json!((std::f64::NEG_INFINITY), "null");
test_canonical_json!(true, "true");
test_canonical_json!(false, "false");
test_canonical_json!(0, "0");
test_canonical_json!(123, "123");
test_canonical_json!((-123), "-123");
test_canonical_json!(23.1, "2.31E1");
test_canonical_json!(23, "23");
test_canonical_json!(1_f64, "1E0");
test_canonical_json!(0_f64, "0E0");
test_canonical_json!(23.0, "2.3E1");
test_canonical_json!((-23.0), "-2.3E1");
test_canonical_json!(2300, "2300");
test_canonical_json!(0.00099, "9.9E-4");
test_canonical_json!(0.000011, "1.1E-5");
test_canonical_json!(0.0000011, "1.1E-6");
test_canonical_json!(0.000001, "1E-6");
test_canonical_json!(5.6, "5.6E0");
test_canonical_json!(0.00000099, "9.9E-7");
test_canonical_json!(0.0000001, "1E-7");
test_canonical_json!(0.000000930258908, "9.30258908E-7");
test_canonical_json!(0.00000000000068272, "6.8272E-13");
test_canonical_json!((10.000_f64.powf(21.0)), "1E21");
test_canonical_json!((10.0_f64.powi(20)), "1E20");
test_canonical_json!((10.0_f64.powi(15) + 0.1), "1.0000000000000001E15");
test_canonical_json!((10.0_f64.powi(16) * 1.1), "1.1E16");
test_canonical_json!("", r#""""#);
test_canonical_json!(
" Preserve single quotes'in string",
r#"" Preserve single quotes'in string""#
);
test_canonical_json!(" Escapes quotes \" ", r#"" Escapes quotes \" ""#);
test_canonical_json!("test", r#""test""#);
test_canonical_json!("This\\and this", r#""This\\and this""#);
test_canonical_json!("I ❤ testing", r#""I \u2764 testing""#);
test_canonical_json!("This is a sentence.\n", r#""This is a sentence.\n""#);
test_canonical_json!("This is a \t tab.", r#""This is a \t tab.""#);
test_canonical_json!(
"This is a \r carriage return char.",
r#""This is a \r carriage return char.""#
);
test_canonical_json!("image/jpeg", r#""image/jpeg""#);
test_canonical_json!("image//jpeg", r#""image//jpeg""#);
test_canonical_json!("frequency at 10.0e+04", r#""frequency at 10.0e+04""#);
test_canonical_json!("I \\u{} testing", r#""I \\u{} testing""#);
test_canonical_json!("I \\u{1234 testing", r#""I \\u{1234 testing""#);
test_canonical_json!("I \\u{{12345}} testing", r#""I \\u{{12345}} testing""#);
test_canonical_json!(
{
"a": {},
"b": "b"
},
r#"{"a":{},"b":"b"}"#
);
test_canonical_json!(
{
"a": "a",
"id": "1",
"b": "b"
},
r#"{"a":"a","b":"b","id":"1"}"#
);
test_canonical_json!(
{
"a": json!({
"b": "b",
"a": "a",
"c": json!({
"b": "b",
"a": "a",
"c": ["b", "a", "c"],
"d": json!({ "b": "b", "a": "a" }),
"id": "1",
"e": 1,
"f": [2, 3, 1],
"g": json!({
"2": 2,
"3": 3,
"1": json!({
"b": "b",
"a": "a",
"c": "c",
})
})
})
}),
"id": "1"
},
concat!(
r#"{"a":{"a":"a","b":"b","c":{"a":"a","b":"b","c":["b","a","c"],"#,
r#""d":{"a":"a","b":"b"},"e":1,"f":[2,3,1],"#,
r#""g":{"1":{"a":"a","b":"b","c":"c"},"2":2,"3":3},"id":"1"}},"id":"1"}"#
)
);
test_canonical_json!(
{
"b": vec!["two", "three"],
"a": vec!["zero", "one"]
},
r#"{"a":["zero","one"],"b":["two","three"]}"#
);
test_canonical_json!(
{
"b": { "d": "d", "c": "c" },
"a": { "b": "b", "a": "a" },
},
r#"{"a":{"a":"a","b":"b"},"b":{"c":"c","d":"d"}}"#
);
test_canonical_json!({"é": "check"}, r#"{"\u00e9":"check"}"#);
test_canonical_json!(
{
"def": "bar",
"abc": json!(0.000000930258908),
"ghi": json!(1000000000000000000000.0_f64),
"rust": "❤",
"zoo": [
"zorilla",
"anteater"
]
},
r#"{"abc":9.30258908E-7,"def":"bar","ghi":1E21,"rust":"\u2764","zoo":["zorilla","anteater"]}"#
);
test_canonical_json!([], "[]");
test_canonical_json!((vec!["one", "two", "three"]), r#"["one","two","three"]"#);
test_canonical_json!((vec![json!({ "key": "✓" })]), r#"[{"key":"\u2713"}]"#);
test_canonical_json!((vec![json!({ "key": "ę" })]), r#"[{"key":"\u0119"}]"#);
test_canonical_json!((vec![json!({ "key": "é" })]), r#"[{"key":"\u00e9"}]"#);
test_canonical_json!(
(vec![
json!({ "foo": "bar", "last_modified": "12345", "id": "1" }),
json!({ "bar": "baz", "last_modified": "45678", "id": "2" }),
]),
r#"[{"foo":"bar","id":"1","last_modified":"12345"},{"bar":"baz","id":"2","last_modified":"45678"}]"#
);
test_canonical_json!(
(vec![
json!({ "foo": "bar", "last_modified": "12345", "id": "1" }),
json!({ "bar": "baz", "last_modified": "45678", "id": "2" }),
]),
r#"[{"foo":"bar","id":"1","last_modified":"12345"},{"bar":"baz","id":"2","last_modified":"45678"}]"#
);
}
}