llm-message-hash 0.1.0

Stable canonical hash of LLM request/message structures. Recursive key-sorting JSON canonicalization + sha256, with per-provider ignore-lists so semantically-equal Anthropic/OpenAI/Bedrock requests produce the same hash. Useful for cache keys and idempotency.
Documentation
//! Canonical JSON emitter.
//!
//! Walks a `serde_json::Value` and writes a deterministic byte sequence:
//!   - object keys sorted by Unicode code point (lexicographic on bytes
//!     since serde_json keys are valid UTF-8)
//!   - ignored field names dropped
//!   - numbers emitted via serde_json's default (which is deterministic
//!     for ints; for floats we accept whatever serde_json produces because
//!     the input distribution is JSON values that survived a round trip)
//!   - strings emitted with serde_json's standard escaping
//!   - no whitespace
//!
//! The output is valid compact JSON. You can re-parse it for debugging.

use std::io::Write;

use serde_json::Value;

use crate::opts::HashOpts;

/// Write the canonical JSON representation of `v` to `out`, honoring `opts`.
pub fn write_canonical<W: Write>(out: &mut W, v: &Value, opts: &HashOpts) -> std::io::Result<()> {
    match v {
        Value::Null => out.write_all(b"null"),
        Value::Bool(b) => out.write_all(if *b { b"true" } else { b"false" }),
        Value::Number(n) => {
            // serde_json's Display impl is deterministic for both integers
            // and the f64s it can losslessly round-trip.
            out.write_all(n.to_string().as_bytes())
        }
        Value::String(s) => {
            let escaped = serde_json::to_string(s).expect("string serialization is infallible");
            out.write_all(escaped.as_bytes())
        }
        Value::Array(items) => {
            out.write_all(b"[")?;
            let mut first = true;
            for item in items {
                if !first {
                    out.write_all(b",")?;
                }
                first = false;
                write_canonical(out, item, opts)?;
            }
            out.write_all(b"]")
        }
        Value::Object(map) => {
            // Sort keys, drop ignored, recurse.
            let mut entries: Vec<(&String, &Value)> = map
                .iter()
                .filter(|(k, _)| !opts.ignore_field_names.contains(&k.as_str()))
                .collect();
            entries.sort_by(|a, b| a.0.cmp(b.0));
            out.write_all(b"{")?;
            let mut first = true;
            for (k, v) in entries {
                if !first {
                    out.write_all(b",")?;
                }
                first = false;
                let escaped_key =
                    serde_json::to_string(k).expect("key serialization is infallible");
                out.write_all(escaped_key.as_bytes())?;
                out.write_all(b":")?;
                write_canonical(out, v, opts)?;
            }
            out.write_all(b"}")
        }
    }
}

#[cfg(test)]
mod tests {
    use serde_json::json;

    use super::*;

    fn canon(v: &Value, opts: &HashOpts) -> String {
        let mut buf = Vec::new();
        write_canonical(&mut buf, v, opts).unwrap();
        String::from_utf8(buf).unwrap()
    }

    #[test]
    fn sorts_object_keys() {
        let v = json!({"b": 1, "a": 2, "c": 3});
        assert_eq!(canon(&v, &HashOpts::default()), r#"{"a":2,"b":1,"c":3}"#);
    }

    #[test]
    fn preserves_array_order() {
        let v = json!([3, 1, 2]);
        assert_eq!(canon(&v, &HashOpts::default()), "[3,1,2]");
    }

    #[test]
    fn drops_ignored_keys() {
        let v = json!({"a": 1, "cache_control": {"type": "ephemeral"}, "b": 2});
        let out = canon(&v, &HashOpts::new().ignore("cache_control"));
        assert_eq!(out, r#"{"a":1,"b":2}"#);
    }

    #[test]
    fn recurses_into_nested_objects() {
        let v = json!({"outer": {"y": 1, "x": 2, "drop_me": "x"}});
        let out = canon(&v, &HashOpts::new().ignore("drop_me"));
        assert_eq!(out, r#"{"outer":{"x":2,"y":1}}"#);
    }

    #[test]
    fn null_bool_number_string() {
        assert_eq!(canon(&json!(null), &HashOpts::default()), "null");
        assert_eq!(canon(&json!(true), &HashOpts::default()), "true");
        assert_eq!(canon(&json!(false), &HashOpts::default()), "false");
        assert_eq!(canon(&json!(42), &HashOpts::default()), "42");
        assert_eq!(canon(&json!(2.5), &HashOpts::default()), "2.5");
        assert_eq!(canon(&json!("hi"), &HashOpts::default()), r#""hi""#);
    }

    #[test]
    fn escapes_strings_properly() {
        let v = json!("she said \"hi\"");
        assert_eq!(canon(&v, &HashOpts::default()), r#""she said \"hi\"""#);
    }

    #[test]
    fn empty_collections() {
        assert_eq!(canon(&json!({}), &HashOpts::default()), "{}");
        assert_eq!(canon(&json!([]), &HashOpts::default()), "[]");
    }
}