llm-message-hash 0.1.0

Stable canonical hash of LLM request/message structures. Recursive key-sorting JSON canonicalization + sha256, with per-provider ignore-lists so semantically-equal Anthropic/OpenAI/Bedrock requests produce the same hash. Useful for cache keys and idempotency.
Documentation
//! End-to-end shape checks.

use serde_json::json;

use llm_message_hash::{
    hash_canonical, hash_canonical_hex, hash_canonical_hex_with, hash_canonical_with, HashOpts,
};

#[test]
fn key_order_does_not_affect_hash() {
    let a = json!({"model": "claude", "messages": [{"role": "user", "content": "hi"}]});
    let b = json!({"messages": [{"content": "hi", "role": "user"}], "model": "claude"});
    assert_eq!(hash_canonical(&a), hash_canonical(&b));
}

#[test]
fn array_order_does_affect_hash() {
    let a = json!([1, 2, 3]);
    let b = json!([3, 2, 1]);
    assert_ne!(hash_canonical(&a), hash_canonical(&b));
}

#[test]
fn anthropic_opts_ignore_cache_control() {
    let with_cc = json!({
        "model": "claude-sonnet-4-5",
        "messages": [{
            "role": "user",
            "content": [{"type": "text", "text": "hi", "cache_control": {"type": "ephemeral"}}],
        }],
    });
    let without_cc = json!({
        "model": "claude-sonnet-4-5",
        "messages": [{
            "role": "user",
            "content": [{"type": "text", "text": "hi"}],
        }],
    });
    assert_eq!(
        hash_canonical_hex_with(&with_cc, &HashOpts::anthropic()),
        hash_canonical_hex_with(&without_cc, &HashOpts::anthropic())
    );
    // But WITHOUT the ignore, they should differ
    assert_ne!(
        hash_canonical_hex(&with_cc),
        hash_canonical_hex(&without_cc)
    );
}

#[test]
fn openai_opts_ignore_response_metadata() {
    let resp_1 = json!({
        "id": "chatcmpl-abc",
        "created": 1700000000,
        "model": "gpt-4",
        "choices": [{"message": {"role": "assistant", "content": "ok"}}],
    });
    let resp_2 = json!({
        "id": "chatcmpl-xyz",
        "created": 1800000000,
        "model": "gpt-4",
        "choices": [{"message": {"role": "assistant", "content": "ok"}}],
    });
    let opts = HashOpts::openai();
    assert_eq!(
        hash_canonical_hex_with(&resp_1, &opts),
        hash_canonical_hex_with(&resp_2, &opts)
    );
}

#[test]
fn hex_output_is_64_lowercase_chars() {
    let h = hash_canonical_hex(&json!({"a": 1}));
    assert_eq!(h.len(), 64);
    assert!(h
        .chars()
        .all(|c| c.is_ascii_hexdigit() && (c.is_ascii_digit() || c.is_ascii_lowercase())));
}

#[test]
fn empty_object_has_stable_hash() {
    let h1 = hash_canonical_hex(&json!({}));
    let h2 = hash_canonical_hex(&json!({}));
    assert_eq!(h1, h2);
    // Known sha256 of "{}" = 44136fa355b3678a1146ad16f7e8649e94fb4fc21fe77e8310c060f61caaff8a
    assert_eq!(
        h1,
        "44136fa355b3678a1146ad16f7e8649e94fb4fc21fe77e8310c060f61caaff8a"
    );
}

#[test]
fn nested_ignore_works_at_any_depth() {
    let a = json!({
        "outer": {
            "inner": {
                "drop_me": "value_a",
                "keep_me": "same",
            }
        }
    });
    let b = json!({
        "outer": {
            "inner": {
                "drop_me": "value_b_different",
                "keep_me": "same",
            }
        }
    });
    let opts = HashOpts::new().ignore("drop_me");
    assert_eq!(
        hash_canonical_with(&a, &opts),
        hash_canonical_with(&b, &opts)
    );
}

#[test]
fn ignore_list_can_be_extended_fluently() {
    let opts = HashOpts::anthropic().ignore("metadata");
    let with = json!({
        "model": "c",
        "metadata": {"user_id": "abc"},
        "messages": [],
    });
    let without = json!({
        "model": "c",
        "messages": [],
    });
    assert_eq!(
        hash_canonical_with(&with, &opts),
        hash_canonical_with(&without, &opts)
    );
}

#[test]
fn changing_a_real_field_changes_the_hash() {
    let a = json!({"model": "claude", "messages": [{"role": "user", "content": "hello"}]});
    let b = json!({"model": "claude", "messages": [{"role": "user", "content": "world"}]});
    assert_ne!(hash_canonical(&a), hash_canonical(&b));
}

#[test]
fn unicode_strings_round_trip() {
    let a = json!({"text": "hello 🦀"});
    let b = json!({"text": "hello 🦀"});
    assert_eq!(hash_canonical(&a), hash_canonical(&b));
}