Skip to main content

kaish_types/
bytes.rs

1//! Binary-data helpers: the base64 JSON envelope and the hex dump.
2//!
3//! kaish is UTF-8 text end to end, so binary values must be encoded the moment
4//! they cross a text boundary. Two encodings, two boundaries:
5//!
6//! - **base64 envelope** — how a `Value::Bytes` / a binary result serializes for
7//!   `--json` and MCP. Self-describing so an agent can act on it:
8//!   `{"_type":"bytes","encoding":"base64","data":"…","len":N}`.
9//! - **hex dump** — how binary renders for a *human* (REPL): `xxd`-style offset
10//!   / hex columns / ASCII gutter.
11//!
12//! See `docs/binary-data.md`.
13
14use base64::Engine;
15use base64::engine::general_purpose::STANDARD;
16
17/// The `_type` discriminator marking a base64 byte envelope.
18pub const BYTES_ENVELOPE_TYPE: &str = "bytes";
19
20/// Encode raw bytes as the self-describing base64 JSON envelope.
21pub fn bytes_to_envelope(data: &[u8]) -> serde_json::Value {
22    let mut map = serde_json::Map::new();
23    map.insert(
24        "_type".to_string(),
25        serde_json::Value::String(BYTES_ENVELOPE_TYPE.to_string()),
26    );
27    map.insert(
28        "encoding".to_string(),
29        serde_json::Value::String("base64".to_string()),
30    );
31    map.insert(
32        "data".to_string(),
33        serde_json::Value::String(STANDARD.encode(data)),
34    );
35    map.insert(
36        "len".to_string(),
37        serde_json::Value::Number((data.len() as u64).into()),
38    );
39    serde_json::Value::Object(map)
40}
41
42/// Recognize the base64 byte envelope and decode it back to raw bytes.
43///
44/// Returns `None` for anything that isn't a well-formed envelope (so a plain
45/// JSON object stays a `Value::Json`). A malformed `data` field — an envelope
46/// claiming base64 it can't decode — is a hard `None`, never a silent empty
47/// vector.
48pub fn envelope_to_bytes(value: &serde_json::Value) -> Option<Vec<u8>> {
49    let obj = value.as_object()?;
50    if obj.get("_type")?.as_str()? != BYTES_ENVELOPE_TYPE {
51        return None;
52    }
53    // Only base64 is defined today; an unknown encoding is not our envelope.
54    if obj.get("encoding").and_then(|e| e.as_str()) != Some("base64") {
55        return None;
56    }
57    let data = obj.get("data")?.as_str()?;
58    STANDARD.decode(data).ok()
59}
60
61/// Render bytes as an `xxd`-style hex dump for human display.
62///
63/// 16 bytes per line: an 8-digit hex offset, the bytes as space-separated hex
64/// pairs (grouped in two columns of eight), then the printable-ASCII gutter
65/// (non-printable bytes shown as `.`). No trailing newline.
66pub fn hex_dump(data: &[u8]) -> String {
67    const PER_LINE: usize = 16;
68    let mut out = String::new();
69    for (line_no, chunk) in data.chunks(PER_LINE).enumerate() {
70        if line_no > 0 {
71            out.push('\n');
72        }
73        out.push_str(&format!("{:08x}  ", line_no * PER_LINE));
74        for i in 0..PER_LINE {
75            if i == 8 {
76                out.push(' ');
77            }
78            match chunk.get(i) {
79                Some(b) => out.push_str(&format!("{:02x} ", b)),
80                None => out.push_str("   "),
81            }
82        }
83        out.push_str(" |");
84        for b in chunk {
85            out.push(if b.is_ascii_graphic() || *b == b' ' {
86                *b as char
87            } else {
88                '.'
89            });
90        }
91        out.push('|');
92    }
93    out
94}
95
96#[cfg(test)]
97mod tests {
98    use super::*;
99
100    #[test]
101    fn envelope_round_trips() {
102        let data = vec![0u8, 1, 2, 255, 128, 64];
103        let env = bytes_to_envelope(&data);
104        assert_eq!(env["_type"], "bytes");
105        assert_eq!(env["encoding"], "base64");
106        assert_eq!(env["len"], 6);
107        assert_eq!(envelope_to_bytes(&env), Some(data));
108    }
109
110    #[test]
111    fn plain_object_is_not_an_envelope() {
112        let plain = serde_json::json!({"name": "amy", "len": 3});
113        assert_eq!(envelope_to_bytes(&plain), None);
114        // A blob envelope is a different _type and must not be mistaken for bytes.
115        let blob = serde_json::json!({"_type": "blob", "id": "x", "size": 1});
116        assert_eq!(envelope_to_bytes(&blob), None);
117    }
118
119    #[test]
120    fn malformed_base64_is_none_not_empty() {
121        let bad = serde_json::json!({"_type":"bytes","encoding":"base64","data":"!!notb64!!","len":3});
122        assert_eq!(envelope_to_bytes(&bad), None);
123    }
124
125    #[test]
126    fn unknown_encoding_is_not_our_envelope() {
127        let hexenc = serde_json::json!({"_type":"bytes","encoding":"hex","data":"00ff","len":2});
128        assert_eq!(envelope_to_bytes(&hexenc), None);
129    }
130
131    #[test]
132    fn hex_dump_layout() {
133        // 0..18 so we get a full line plus a short second line.
134        let data: Vec<u8> = (0u8..18).collect();
135        let dump = hex_dump(&data);
136        let lines: Vec<&str> = dump.lines().collect();
137        assert_eq!(lines.len(), 2);
138        assert!(lines[0].starts_with("00000000  00 01 02 03 04 05 06 07  08 09 0a 0b 0c 0d 0e 0f"));
139        assert!(lines[1].starts_with("00000010  10 11 "));
140        // ASCII gutter shows printable chars; control bytes become dots.
141        assert!(lines[0].ends_with('|'));
142    }
143
144    #[test]
145    fn hex_dump_ascii_gutter() {
146        let dump = hex_dump(b"AB\x00C");
147        // 'A' 'B' printable, 0x00 -> '.', 'C' printable
148        assert!(dump.ends_with("|AB.C|"), "gutter: {dump}");
149    }
150}