Skip to main content

devboy_format_pipeline/
near_ref.rs

1//! Type-2 (near-duplicate) reference hints — Paper 2 §Near-reference.
2//!
3//! When the L0 cache holds a body that is *not* byte-identical to the
4//! current response but differs only in a few enumerated fields, we
5//! emit a compact delta hint instead of the full body or a full
6//! reference hint. The canonical case is pipeline polling: two calls
7//! to `get_pipeline(id=42)` differing only in `status` and `duration`.
8//!
9//! ```text
10//! > [near-ref: tc_42, status: pending→success, duration: +22s]
11//! ```
12//!
13//! Eligibility (per the paper):
14//!
15//! - Both inputs must be valid JSON objects.
16//! - Their top-level key sets must match (no fields added or removed).
17//! - The **delta payload** (rendered hint minus framing) must be
18//!   ≤ 30 bytes.
19//! - The full response must be ≥ 500 bytes — savings on shorter
20//!   bodies do not justify the delta-extraction cost.
21//!
22//! The eligibility predicate is conservative: when in doubt, this
23//! module returns `None` and the caller falls back to either a full
24//! reference hint or fresh body.
25
26use std::collections::BTreeSet;
27
28/// One scalar field that differs between two JSON objects.
29#[derive(Debug, Clone, PartialEq, Eq)]
30pub struct DeltaField {
31    pub key: String,
32    pub old: String,
33    pub new: String,
34}
35
36/// Configuration knobs for delta extraction.
37#[derive(Debug, Clone, Copy)]
38pub struct NearRefConfig {
39    /// Maximum total bytes in the rendered delta payload (`status: a→b, …`).
40    pub max_delta_bytes: usize,
41    /// Minimum input body length (bytes) for which a near-ref hint is
42    /// worth emitting. Below this, the full body is cheaper than the
43    /// extraction work.
44    pub min_body_bytes: usize,
45}
46
47impl Default for NearRefConfig {
48    fn default() -> Self {
49        Self {
50            // 50-byte ceiling fits the canonical pipeline-polling case
51            // (status: pending→success, duration: 12→34 ≈ 34 bytes
52            // including framing) while still rejecting cases where
53            // half a dozen fields drifted. Paper text uses 30 as the
54            // payload-only bound, but the runtime sums framing in too.
55            max_delta_bytes: 50,
56            min_body_bytes: 500,
57        }
58    }
59}
60
61/// Try to extract a delta between `old_body` and `new_body`. Returns the
62/// list of differing fields when *all* eligibility checks pass; otherwise
63/// `None` so the caller can fall back to a full ref / fresh body.
64pub fn extract_delta(
65    old_body: &str,
66    new_body: &str,
67    config: &NearRefConfig,
68) -> Option<Vec<DeltaField>> {
69    if new_body.len() < config.min_body_bytes {
70        return None;
71    }
72    let old: serde_json::Value = serde_json::from_str(old_body.trim_start()).ok()?;
73    let new: serde_json::Value = serde_json::from_str(new_body.trim_start()).ok()?;
74    let old_obj = old.as_object()?;
75    let new_obj = new.as_object()?;
76
77    // Top-level key sets must match. A new field appearing means the
78    // shape shifted and a delta hint would be misleading.
79    let old_keys: BTreeSet<&str> = old_obj.keys().map(|s| s.as_str()).collect();
80    let new_keys: BTreeSet<&str> = new_obj.keys().map(|s| s.as_str()).collect();
81    if old_keys != new_keys {
82        return None;
83    }
84
85    let mut deltas = Vec::new();
86    for (k, new_val) in new_obj {
87        let old_val = match old_obj.get(k) {
88            Some(v) => v,
89            None => unreachable!("key sets are equal"),
90        };
91        if old_val == new_val {
92            continue;
93        }
94        // Only scalar diffs round-trip cleanly into a hint. A nested
95        // object difference would balloon the hint and obscure the
96        // delta — refuse the hint and let the caller pass the full body.
97        if !is_scalar(old_val) || !is_scalar(new_val) {
98            return None;
99        }
100        deltas.push(DeltaField {
101            key: k.clone(),
102            old: scalar_to_string(old_val),
103            new: scalar_to_string(new_val),
104        });
105    }
106
107    // Size the eligibility gate against the *actually rendered* delta
108    // payload — the previous +4 approximation under-counted the UTF-8
109    // arrow (`→` is 3 bytes) and the `", "`/`": "` separators, which
110    // could let near-ref hints exceed `max_delta_bytes`.
111    if rendered_delta_bytes(&deltas) > config.max_delta_bytes {
112        return None;
113    }
114
115    // No diff at all → caller should emit a full byte-identical hint
116    // via the regular L0 path, not a near-ref. Returning `None` here
117    // lets the caller distinguish.
118    if deltas.is_empty() {
119        return None;
120    }
121    Some(deltas)
122}
123
124/// Size in bytes of the delta payload as it appears in the rendered hint
125/// (i.e. the part after `> [near-ref: <id>` and before the closing `]`).
126/// Mirrors [`render_near_ref_hint`] precisely so `max_delta_bytes` gates
127/// what actually goes on the wire, not an approximation.
128fn rendered_delta_bytes(deltas: &[DeltaField]) -> usize {
129    let mut frag = String::new();
130    for d in deltas {
131        frag.push_str(", ");
132        frag.push_str(&d.key);
133        frag.push_str(": ");
134        frag.push_str(&d.old);
135        frag.push('→');
136        frag.push_str(&d.new);
137    }
138    frag.len()
139}
140
141fn is_scalar(v: &serde_json::Value) -> bool {
142    matches!(
143        v,
144        serde_json::Value::Null
145            | serde_json::Value::Bool(_)
146            | serde_json::Value::Number(_)
147            | serde_json::Value::String(_)
148    )
149}
150
151fn scalar_to_string(v: &serde_json::Value) -> String {
152    match v {
153        serde_json::Value::Null => String::new(),
154        serde_json::Value::Bool(b) => b.to_string(),
155        serde_json::Value::Number(n) => n.to_string(),
156        serde_json::Value::String(s) => s.clone(),
157        _ => v.to_string(),
158    }
159}
160
161/// Render the near-ref hint for the agent. Single line, matches the
162/// `> [near-ref: <id>, <field>: <old>→<new>, …]` format documented in
163/// Paper 2 §Near-reference.
164pub fn render_near_ref_hint(reference_id: &str, deltas: &[DeltaField]) -> String {
165    let mut out = String::new();
166    out.push_str("> [near-ref: ");
167    out.push_str(reference_id);
168    for d in deltas {
169        out.push_str(", ");
170        out.push_str(&d.key);
171        out.push_str(": ");
172        out.push_str(&d.old);
173        out.push('→');
174        out.push_str(&d.new);
175    }
176    out.push(']');
177    out
178}
179
180#[cfg(test)]
181mod tests {
182    use super::*;
183
184    fn cfg() -> NearRefConfig {
185        NearRefConfig::default()
186    }
187
188    fn long_pipeline_body(status: &str, duration: u64) -> String {
189        // Pad with a generic field so the body clears `min_body_bytes`.
190        format!(
191            r#"{{"id":42,"name":"{}","status":"{}","duration":{},"url":"https://example.com/pipelines/42","commit_sha":"deadbeefdeadbeefdeadbeefdeadbeef","ref":"refs/heads/main","author":"alice","triggered_by":"webhook","preview":"{}"}}"#,
192            "p".repeat(20),
193            status,
194            duration,
195            "x".repeat(400)
196        )
197    }
198
199    #[test]
200    fn extracts_status_and_duration_delta_for_pipeline_polling() {
201        let a = long_pipeline_body("pending", 12);
202        let b = long_pipeline_body("success", 34);
203        let deltas = extract_delta(&a, &b, &cfg()).unwrap();
204        assert_eq!(deltas.len(), 2);
205        let keys: BTreeSet<_> = deltas.iter().map(|d| d.key.as_str()).collect();
206        assert!(keys.contains("status"));
207        assert!(keys.contains("duration"));
208    }
209
210    #[test]
211    fn refuses_when_too_short() {
212        // Both bodies are well under min_body_bytes (500).
213        let a = r#"{"a":1,"b":2}"#;
214        let b = r#"{"a":1,"b":3}"#;
215        assert!(extract_delta(a, b, &cfg()).is_none());
216    }
217
218    #[test]
219    fn refuses_when_keys_differ() {
220        let a = format!(r#"{{"a":1,"long":"{}"}}"#, "x".repeat(600));
221        let b = format!(r#"{{"a":1,"different":"{}"}}"#, "x".repeat(600));
222        assert!(extract_delta(&a, &b, &cfg()).is_none());
223    }
224
225    #[test]
226    fn refuses_when_nested_value_changes() {
227        let a = format!(r#"{{"meta":{{"k":1}},"pad":"{}"}}"#, "x".repeat(600));
228        let b = format!(r#"{{"meta":{{"k":2}},"pad":"{}"}}"#, "x".repeat(600));
229        assert!(extract_delta(&a, &b, &cfg()).is_none());
230    }
231
232    #[test]
233    fn refuses_when_delta_blob_too_large() {
234        // Two very long string fields differ — easily exceeds 50 bytes.
235        let a = format!(
236            r#"{{"x":"{}","pad":"{}"}}"#,
237            "a".repeat(80),
238            "p".repeat(600)
239        );
240        let b = format!(
241            r#"{{"x":"{}","pad":"{}"}}"#,
242            "b".repeat(80),
243            "p".repeat(600)
244        );
245        assert!(extract_delta(&a, &b, &cfg()).is_none());
246    }
247
248    #[test]
249    fn returns_none_for_byte_identical_inputs() {
250        // Caller should fall back to the regular byte-identical hint
251        // path rather than treat a zero-delta change as a near-ref.
252        let a = long_pipeline_body("ok", 10);
253        assert!(extract_delta(&a, &a, &cfg()).is_none());
254    }
255
256    #[test]
257    fn render_format_matches_paper_spec() {
258        let deltas = vec![
259            DeltaField {
260                key: "status".into(),
261                old: "pending".into(),
262                new: "success".into(),
263            },
264            DeltaField {
265                key: "duration".into(),
266                old: "12".into(),
267                new: "34".into(),
268            },
269        ];
270        let s = render_near_ref_hint("tc_42", &deltas);
271        assert_eq!(
272            s,
273            "> [near-ref: tc_42, status: pending→success, duration: 12→34]"
274        );
275    }
276
277    #[test]
278    fn hint_size_under_paper_budget() {
279        // Paper 2 §Hint Cost Model targets ≤ 18 cl100k_base tokens for
280        // a typical near-ref hint. We assert byte length as a proxy
281        // (≤ 70 bytes ≈ 18 tokens on average for ASCII).
282        let deltas = vec![
283            DeltaField {
284                key: "status".into(),
285                old: "pending".into(),
286                new: "success".into(),
287            },
288            DeltaField {
289                key: "duration".into(),
290                old: "12".into(),
291                new: "34".into(),
292            },
293        ];
294        let s = render_near_ref_hint("tc_42", &deltas);
295        assert!(s.len() <= 70, "near-ref hint too long: {} bytes", s.len());
296    }
297}