Skip to main content

sim_lib_stream_fabric/
content_key.rs

1//! Content-addressed identities for distributed eval requests.
2
3use std::cmp::Ordering;
4
5use sim_kernel::{CanonicalKey, ContentId, Datum, EvalRequest, NumberLiteral, Symbol};
6
7/// Content-addressed identity for an [`EvalRequest`].
8///
9/// A `ContentKey` wraps a small [`Datum`] that records the content id of the
10/// canonical request datum. The canonical request includes the expression,
11/// required capabilities sorted lexicographically, consistency, mode, answer
12/// limit, stream buffer, stream flag, and trace flag.
13///
14/// Excluded fields are deliberate: `deadline` is a scheduling bound, not part
15/// of the requested work, and `result_shape` is currently a process-local
16/// runtime value rather than stable content-addressed data.
17#[derive(Clone, Debug, PartialEq, Eq, Hash)]
18pub struct ContentKey(Datum);
19
20impl ContentKey {
21    /// Derives a deterministic content key from the stable fields of `request`.
22    pub fn from_request(request: &EvalRequest) -> Self {
23        let id = request_content_datum(request)
24            .content_id()
25            .expect("content key request datum is canonical");
26        Self(content_id_datum(id))
27    }
28
29    /// Returns the datum representation of this key.
30    pub fn datum(&self) -> &Datum {
31        &self.0
32    }
33
34    /// Consumes this key and returns its datum representation.
35    pub fn into_datum(self) -> Datum {
36        self.0
37    }
38}
39
40impl PartialOrd for ContentKey {
41    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
42        Some(self.cmp(other))
43    }
44}
45
46impl Ord for ContentKey {
47    fn cmp(&self, other: &Self) -> Ordering {
48        self.canonical_bytes().cmp(&other.canonical_bytes())
49    }
50}
51
52impl ContentKey {
53    fn canonical_bytes(&self) -> Vec<u8> {
54        self.0
55            .canonical_bytes()
56            .expect("content key datum is canonical")
57    }
58}
59
60fn request_content_datum(request: &EvalRequest) -> Datum {
61    let mut capabilities = request.required_capabilities.to_vec();
62    capabilities.sort();
63
64    Datum::Node {
65        tag: Symbol::qualified("fabric", "eval-request-content-key-v1"),
66        fields: vec![
67            field("expr", canonical_key_datum(&request.expr.canonical_key())),
68            field(
69                "required-capabilities",
70                Datum::Vector(
71                    capabilities
72                        .iter()
73                        .map(|capability| Datum::String(capability.as_str().to_owned()))
74                        .collect(),
75                ),
76            ),
77            field(
78                "consistency",
79                Datum::Symbol(request.consistency.as_symbol()),
80            ),
81            field("mode", Datum::Symbol(request.mode.as_symbol())),
82            field("answer-limit", optional_usize_datum(request.answer_limit)),
83            field("stream-buffer", optional_usize_datum(request.stream_buffer)),
84            field("stream", Datum::Bool(request.stream)),
85            field("trace", Datum::Bool(request.trace)),
86        ],
87    }
88}
89
90fn content_id_datum(id: ContentId) -> Datum {
91    Datum::Node {
92        tag: Symbol::qualified("fabric", "content-key"),
93        fields: vec![
94            field("algorithm", Datum::Symbol(id.algorithm)),
95            field("digest", Datum::Bytes(id.bytes.to_vec())),
96        ],
97    }
98}
99
100fn canonical_key_datum(key: &CanonicalKey) -> Datum {
101    match key {
102        CanonicalKey::Atom(tag) => tagged_node("atom", tag, Vec::new()),
103        CanonicalKey::Bool(tag, value) => {
104            tagged_node("bool", tag, vec![field("value", Datum::Bool(*value))])
105        }
106        CanonicalKey::Bytes(tag, bytes) => tagged_node(
107            "bytes",
108            tag,
109            vec![field("value", Datum::Bytes(bytes.clone()))],
110        ),
111        CanonicalKey::String(tag, value) => tagged_node(
112            "string",
113            tag,
114            vec![field("value", Datum::String(value.clone()))],
115        ),
116        CanonicalKey::Symbol(tag, symbol) => tagged_node(
117            "symbol",
118            tag,
119            vec![field("value", Datum::Symbol(symbol.clone()))],
120        ),
121        CanonicalKey::Pair(tag, left, right) => tagged_node(
122            "pair",
123            tag,
124            vec![
125                field("left", Datum::String(left.clone())),
126                field("right", Datum::String(right.clone())),
127            ],
128        ),
129        CanonicalKey::Compound(tag, items) => tagged_node(
130            "compound",
131            tag,
132            vec![field(
133                "items",
134                Datum::Vector(items.iter().map(canonical_key_datum).collect()),
135            )],
136        ),
137        CanonicalKey::CompoundNamed(tag, items) => tagged_node(
138            "compound-named",
139            tag,
140            vec![field(
141                "items",
142                Datum::Vector(
143                    items
144                        .iter()
145                        .map(|(name, value)| Datum::Node {
146                            tag: Symbol::qualified("fabric", "canonical-key-item"),
147                            fields: vec![
148                                field("name", Datum::String(name.clone())),
149                                field("value", canonical_key_datum(value)),
150                            ],
151                        })
152                        .collect(),
153                ),
154            )],
155        ),
156    }
157}
158
159fn tagged_node(kind: &str, tag: &str, mut fields: Vec<(Symbol, Datum)>) -> Datum {
160    fields.insert(0, field("tag", Datum::String(tag.to_owned())));
161    Datum::Node {
162        tag: Symbol::qualified("fabric", format!("canonical-key-{kind}")),
163        fields,
164    }
165}
166
167fn optional_usize_datum(value: Option<usize>) -> Datum {
168    value.map_or(Datum::Nil, usize_datum)
169}
170
171fn usize_datum(value: usize) -> Datum {
172    Datum::Number(NumberLiteral {
173        domain: Symbol::qualified("core", "usize"),
174        canonical: value.to_string(),
175    })
176}
177
178fn field(name: &str, value: Datum) -> (Symbol, Datum) {
179    (Symbol::new(name), value)
180}
181
182#[cfg(test)]
183mod tests {
184    use sim_kernel::{CapabilityName, Consistency, EvalMode, EvalRequest, Expr, Symbol};
185
186    use super::ContentKey;
187
188    fn req(expr: &str, caps: &[&str]) -> EvalRequest {
189        EvalRequest {
190            expr: Expr::String(expr.to_owned()),
191            result_shape: None,
192            required_capabilities: caps
193                .iter()
194                .map(|capability| CapabilityName::new(*capability))
195                .collect(),
196            deadline: None,
197            consistency: Consistency::LocalFirst,
198            mode: EvalMode::Eval,
199            answer_limit: None,
200            stream_buffer: None,
201            stream: false,
202            trace: false,
203        }
204    }
205
206    #[test]
207    fn identical_requests_produce_equal_content_keys() {
208        assert_eq!(
209            ContentKey::from_request(&req("hello", &["a", "b"])),
210            ContentKey::from_request(&req("hello", &["a", "b"])),
211        );
212    }
213
214    #[test]
215    fn different_expressions_produce_distinct_keys() {
216        assert_ne!(
217            ContentKey::from_request(&req("hello", &["a"])),
218            ContentKey::from_request(&req("world", &["a"])),
219        );
220    }
221
222    #[test]
223    fn capability_order_does_not_affect_key() {
224        assert_eq!(
225            ContentKey::from_request(&req("e", &["b", "a"])),
226            ContentKey::from_request(&req("e", &["a", "b"])),
227        );
228    }
229
230    fn model_req(task: &str, model: &str, temperature: &str) -> EvalRequest {
231        EvalRequest {
232            expr: Expr::Map(vec![
233                (Expr::Symbol(Symbol::new("model-request")), Expr::Bool(true)),
234                (
235                    Expr::Symbol(Symbol::new("task")),
236                    Expr::String(task.to_owned()),
237                ),
238                (
239                    Expr::Symbol(Symbol::new("messages")),
240                    Expr::List(Vec::new()),
241                ),
242                (
243                    Expr::Symbol(Symbol::new("model")),
244                    Expr::String(model.to_owned()),
245                ),
246                (
247                    Expr::Symbol(Symbol::new("temperature")),
248                    Expr::Number(sim_kernel::NumberLiteral {
249                        domain: Symbol::qualified("numbers", "f64"),
250                        canonical: temperature.to_owned(),
251                    }),
252                ),
253            ]),
254            result_shape: None,
255            required_capabilities: Vec::new(),
256            deadline: None,
257            consistency: Consistency::LocalFirst,
258            mode: EvalMode::Eval,
259            answer_limit: None,
260            stream_buffer: None,
261            stream: false,
262            trace: false,
263        }
264    }
265
266    #[test]
267    fn model_id_change_changes_content_key() {
268        assert_ne!(
269            ContentKey::from_request(&model_req("summarize x", "local/qwen3.5:4b", "0.1")),
270            ContentKey::from_request(&model_req("summarize x", "local/qwen3.6:35b", "0.1")),
271        );
272    }
273
274    #[test]
275    fn model_param_change_changes_content_key() {
276        assert_ne!(
277            ContentKey::from_request(&model_req("summarize x", "local/qwen3.5:4b", "0.1")),
278            ContentKey::from_request(&model_req("summarize x", "local/qwen3.5:4b", "0.9")),
279        );
280    }
281
282    #[test]
283    fn identical_model_request_reproduces_content_key() {
284        assert_eq!(
285            ContentKey::from_request(&model_req("summarize x", "local/qwen3.5:4b", "0.1")),
286            ContentKey::from_request(&model_req("summarize x", "local/qwen3.5:4b", "0.1")),
287        );
288    }
289}