use crate::error::JcsError;
use serde_json::{Map, Number, Value};
use std::collections::BTreeSet;
#[derive(Debug, Clone, Default)]
pub struct Canonicalizer;
impl Canonicalizer {
#[inline]
pub fn new() -> Self {
Self
}
pub fn canonicalize(&self, value: &Value) -> Result<String, JcsError> {
let mut out = String::new();
self.write_value(&mut out, value)?;
Ok(out)
}
pub fn canonicalize_bytes(&self, value: &Value) -> Result<Vec<u8>, JcsError> {
Ok(self.canonicalize(value)?.into_bytes())
}
fn write_value(&self, out: &mut String, value: &Value) -> Result<(), JcsError> {
match value {
Value::Null => {
out.push_str("null");
}
Value::Bool(b) => {
out.push_str(if *b { "true" } else { "false" });
}
Value::Number(n) => {
self.write_number(out, n);
}
Value::String(s) => {
self.write_string(out, s);
}
Value::Array(arr) => self.write_array(out, arr)?,
Value::Object(obj) => self.write_object(out, obj)?,
}
Ok(())
}
fn write_number(&self, out: &mut String, n: &Number) {
out.push_str(&n.to_string());
}
fn write_string(&self, out: &mut String, s: &str) {
out.push('"');
for c in s.chars() {
match c {
'"' => out.push_str("\\\""),
'\\' => out.push_str("\\\\"),
'\u{08}' => out.push_str("\\b"), '\u{0C}' => out.push_str("\\f"), '\n' => out.push_str("\\n"),
'\r' => out.push_str("\\r"),
'\t' => out.push_str("\\t"),
c if c.is_control() => {
out.push_str(&format!("\\u{:04x}", c as u32));
}
c => out.push(c),
}
}
out.push('"');
}
fn write_array(&self, out: &mut String, arr: &[Value]) -> Result<(), JcsError> {
out.push('[');
let mut first = true;
for v in arr {
if !first {
out.push(',');
}
first = false;
self.write_value(out, v)?;
}
out.push(']');
Ok(())
}
fn write_object(&self, out: &mut String, obj: &Map<String, Value>) -> Result<(), JcsError> {
out.push('{');
let mut seen_keys = BTreeSet::new();
let mut first = true;
for (key, value) in obj.iter() {
if !seen_keys.insert(key.clone()) {
return Err(JcsError::DuplicateKey { key: key.clone() });
}
if !first {
out.push(',');
}
first = false;
self.write_string(out, key);
out.push(':');
self.write_value(out, value)?;
}
out.push('}');
Ok(())
}
}
pub fn parse_with_dup_check(s: &str) -> Result<Value, JcsError> {
if let Some(dup) = find_duplicate_key(s) {
return Err(JcsError::DuplicateKey { key: dup });
}
let value: Value = serde_json::from_str(s).map_err(|e| JcsError::InvalidJson {
reason: e.to_string(),
})?;
Ok(value)
}
fn find_duplicate_key(s: &str) -> Option<String> {
let bytes = s.as_bytes();
let n = bytes.len();
let mut i = 0;
let mut depth: usize = 0;
let mut seen: std::collections::HashMap<(String, usize), usize, _> =
std::collections::HashMap::new();
while i < n {
match bytes[i] {
b'"' => {
let key_start = i + 1;
let key_end = skip_string(bytes, key_start, n);
let key = String::from_utf8_lossy(&bytes[key_start..key_end]).to_string();
i = key_end;
if is_key_at_depth(bytes, key_end, n) {
let key_depth = depth;
if let Some(&first_depth) = seen.get(&(key.clone(), key_depth)) {
if first_depth == key_depth {
return Some(key);
}
}
seen.insert((key, key_depth), key_depth);
}
}
b'{' | b'[' => {
depth += 1;
i += 1;
}
b'}' | b']' => {
depth = depth.saturating_sub(1);
i += 1;
}
_ => {
i += 1;
}
}
}
None
}
fn skip_string(bytes: &[u8], mut i: usize, n: usize) -> usize {
while i < n {
match bytes[i] {
b'"' => return i + 1, b'\\' => i += 2, _ => i += 1,
}
}
n }
fn is_key_at_depth(bytes: &[u8], pos: usize, n: usize) -> bool {
let mut j = pos;
while j < n && (bytes[j] == b' ' || bytes[j] == b'\t' || bytes[j] == b'\n' || bytes[j] == b'\r')
{
j += 1;
}
j < n && bytes[j] == b':'
}
fn detect_duplicates(value: &Value) -> Result<(), JcsError> {
match value {
Value::Object(map) => {
let mut keys = BTreeSet::new();
for key in map.keys() {
if !keys.insert(key.clone()) {
return Err(JcsError::DuplicateKey { key: key.clone() });
}
}
for v in map.values() {
detect_duplicates(v)?;
}
}
Value::Array(arr) => {
for v in arr {
detect_duplicates(v)?;
}
}
_ => {}
}
Ok(())
}
pub fn parse_and_validate(input: &str) -> Result<Value, JcsError> {
let value = serde_json::from_str(input).map_err(JcsError::ParseError)?;
detect_duplicates(&value)?;
Ok(value)
}
pub fn canonicalize_flexible(value: &Value) -> Result<String, JcsError> {
detect_duplicates(value)?;
Canonicalizer::new().canonicalize(value)
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn test_null() {
let c = Canonicalizer::new();
assert_eq!(c.canonicalize(&json!(null)).unwrap(), "null");
}
#[test]
fn test_bool() {
let c = Canonicalizer::new();
assert_eq!(c.canonicalize(&json!(true)).unwrap(), "true");
assert_eq!(c.canonicalize(&json!(false)).unwrap(), "false");
}
#[test]
fn test_numbers() {
let c = Canonicalizer::new();
assert_eq!(c.canonicalize(&json!(42)).unwrap(), "42");
assert_eq!(c.canonicalize(&json!(-1)).unwrap(), "-1");
assert_eq!(c.canonicalize(&json!(0.5)).unwrap(), "0.5");
}
#[test]
fn test_string_basic() {
let c = Canonicalizer::new();
assert_eq!(c.canonicalize(&json!("hello")).unwrap(), "\"hello\"");
}
#[test]
fn test_string_escapes() {
let c = Canonicalizer::new();
assert_eq!(c.canonicalize(&json!("a\"b")).unwrap(), "\"a\\\"b\"");
assert_eq!(c.canonicalize(&json!("a\\b")).unwrap(), "\"a\\\\b\"");
assert_eq!(c.canonicalize(&json!("a\nb")).unwrap(), "\"a\\nb\"");
assert_eq!(c.canonicalize(&json!("a\u{0}b")).unwrap(), "\"a\\u0000b\"");
}
#[test]
fn test_object_sorted_keys() {
let c = Canonicalizer::new();
let obj = json!({"b": 1, "a": 2, "c": 3});
let out = c.canonicalize(&obj).unwrap();
assert_eq!(out, r#"{"a":2,"b":1,"c":3}"#);
}
#[test]
fn test_nested_object() {
let c = Canonicalizer::new();
let obj = json!({
"z": {"b": 1, "a": 2},
"a": [3, 2, 1]
});
let out = c.canonicalize(&obj).unwrap();
assert_eq!(out, r#"{"a":[3,2,1],"z":{"a":2,"b":1}}"#);
}
#[test]
fn test_duplicate_key_rejected() {
let result = parse_with_dup_check(r#"{"a": 1, "a": 2}"#);
assert!(matches!(result, Err(JcsError::DuplicateKey { .. })));
}
#[test]
fn test_detect_duplicates_nested() {
let s = r#"{"outer": {"x": 1, "x": 2}}"#;
let result = parse_with_dup_check(s);
assert!(matches!(result, Err(JcsError::DuplicateKey { .. })));
}
}