Skip to main content

koala_artifact/
record.rs

1//! Artifact record: the on-disk markdown produced by `record` and
2//! consumed by `verify`.
3//!
4//! Layout (`.review/round-N/<kind>-<name>.md`):
5//!
6//! ```text
7//! ---
8//! schema: koala-artifact/v1
9//! kind: concept
10//! name: no-stale-refs
11//! reviewer: agent:concept-bot
12//! round: 1
13//! timestamp: 2026-05-07T14:32:01Z
14//! commit: a3f8c12
15//! exit: 1
16//! hash: sha256:<hex>
17//! command: ["grep","-rn","foo","crates/"]
18//! ---
19//!
20//! # Concept review: no-stale-refs
21//! ## Command
22//!     grep -rn foo crates/
23//! ## Exit
24//! 1
25//! ## Output
26//!     (rendered output for humans)
27//! ## Hash
28//! `sha256:<hex>`
29//! ```
30//!
31//! Frontmatter is the source of truth for `verify`. The body sections are
32//! human-readable mirrors and not re-parsed.
33//!
34//! `output_b64` carries the raw output bytes so verify can produce a diff
35//! on hash mismatch. It is a separate field from the rendered `## Output`
36//! section because a base64 blob is round-trippable while the rendered
37//! markdown is not.
38
39use crate::kind::ReviewerKind;
40use crate::path::ArtifactPath;
41use std::collections::BTreeMap;
42use std::fmt;
43
44pub const SCHEMA_TAG: &str = "koala-artifact/v1";
45
46#[derive(Debug, Clone, PartialEq, Eq)]
47pub struct ArtifactRecord {
48    pub path: ArtifactPath,
49    pub reviewer: String,
50    pub timestamp: String,
51    pub commit: Option<String>,
52    pub command: Vec<String>,
53    pub exit_code: i32,
54    pub output: String,
55    pub hash: String,
56}
57
58impl ArtifactRecord {
59    pub fn render(&self) -> String {
60        let mut out = String::new();
61        out.push_str("---\n");
62        out.push_str(&format!("schema: {SCHEMA_TAG}\n"));
63        out.push_str(&format!("kind: {}\n", self.path.kind));
64        out.push_str(&format!("name: {}\n", self.path.name));
65        out.push_str(&format!("reviewer: {}\n", self.reviewer));
66        out.push_str(&format!("round: {}\n", self.path.round));
67        out.push_str(&format!("timestamp: {}\n", self.timestamp));
68        if let Some(c) = &self.commit {
69            out.push_str(&format!("commit: {c}\n"));
70        }
71        out.push_str(&format!("exit: {}\n", self.exit_code));
72        out.push_str(&format!("hash: {}\n", self.hash));
73        out.push_str(&format!(
74            "command: {}\n",
75            encode_string_array(&self.command)
76        ));
77        out.push_str(&format!(
78            "output_b64: {}\n",
79            b64_encode(self.output.as_bytes())
80        ));
81        out.push_str("---\n\n");
82
83        out.push_str(&format!(
84            "# {} review: {}\n\n",
85            self.path.kind.title(),
86            self.path.name
87        ));
88        out.push_str("## Command\n\n");
89        out.push_str(&indent_block(&shell_quote(&self.command), "    "));
90        out.push_str("\n\n## Exit\n\n");
91        out.push_str(&format!("{}\n\n", self.exit_code));
92        out.push_str("## Output\n\n");
93        if self.output.is_empty() {
94            out.push_str("    (empty)\n");
95        } else {
96            out.push_str(&indent_block(self.output.trim_end_matches('\n'), "    "));
97            out.push('\n');
98        }
99        out.push_str("\n## Hash\n\n");
100        out.push_str(&format!("`{}`\n", self.hash));
101        out
102    }
103
104    /// Parse the on-disk markdown back to its structured form. Verify reads
105    /// `command` and `hash` from the frontmatter; everything else is
106    /// preserved for diffing / rendering.
107    pub fn parse(text: &str) -> Result<Self, ParseError> {
108        let (front, _body) = split_frontmatter(text).ok_or(ParseError::MissingFrontmatter)?;
109        let map = parse_kv_lines(front)?;
110
111        let schema = map
112            .get("schema")
113            .ok_or(ParseError::MissingField("schema"))?;
114        if schema != SCHEMA_TAG {
115            return Err(ParseError::UnknownSchema(schema.clone()));
116        }
117
118        let kind: ReviewerKind = map
119            .get("kind")
120            .ok_or(ParseError::MissingField("kind"))?
121            .parse()
122            .map_err(ParseError::BadKind)?;
123        let name = map
124            .get("name")
125            .ok_or(ParseError::MissingField("name"))?
126            .clone();
127        let round: u32 = map
128            .get("round")
129            .ok_or(ParseError::MissingField("round"))?
130            .parse()
131            .map_err(|e: std::num::ParseIntError| ParseError::BadInt("round", e.to_string()))?;
132        let path =
133            ArtifactPath::new(round, kind, name).map_err(|e| ParseError::BadPath(e.to_string()))?;
134        let reviewer = map
135            .get("reviewer")
136            .ok_or(ParseError::MissingField("reviewer"))?
137            .clone();
138        let timestamp = map
139            .get("timestamp")
140            .ok_or(ParseError::MissingField("timestamp"))?
141            .clone();
142        let commit = map.get("commit").cloned();
143        let exit_code: i32 = map
144            .get("exit")
145            .ok_or(ParseError::MissingField("exit"))?
146            .parse()
147            .map_err(|e: std::num::ParseIntError| ParseError::BadInt("exit", e.to_string()))?;
148        let hash = map
149            .get("hash")
150            .ok_or(ParseError::MissingField("hash"))?
151            .clone();
152        let command_raw = map
153            .get("command")
154            .ok_or(ParseError::MissingField("command"))?;
155        let command = decode_string_array(command_raw).map_err(ParseError::BadCommand)?;
156        let output = match map.get("output_b64") {
157            Some(b) if !b.is_empty() => {
158                let bytes = b64_decode(b).map_err(ParseError::BadOutputB64)?;
159                String::from_utf8_lossy(&bytes).into_owned()
160            }
161            _ => String::new(),
162        };
163
164        Ok(Self {
165            path,
166            reviewer,
167            timestamp,
168            commit,
169            command,
170            exit_code,
171            output,
172            hash,
173        })
174    }
175}
176
177#[derive(Debug, Clone, PartialEq, Eq)]
178pub enum ParseError {
179    MissingFrontmatter,
180    MissingField(&'static str),
181    UnknownSchema(String),
182    BadKind(String),
183    BadInt(&'static str, String),
184    BadPath(String),
185    BadCommand(String),
186    BadOutputB64(String),
187    BadKvLine(String),
188}
189
190impl fmt::Display for ParseError {
191    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
192        match self {
193            Self::MissingFrontmatter => {
194                write!(f, "artifact is missing the `---` frontmatter block")
195            }
196            Self::MissingField(k) => write!(f, "frontmatter is missing required key `{k}`"),
197            Self::UnknownSchema(s) => write!(
198                f,
199                "unsupported artifact schema `{s}`; expected `{SCHEMA_TAG}`"
200            ),
201            Self::BadKind(s) => write!(f, "{s}"),
202            Self::BadInt(k, s) => write!(f, "key `{k}` is not an integer: {s}"),
203            Self::BadPath(s) => write!(f, "{s}"),
204            Self::BadCommand(s) => write!(f, "key `command` is not a JSON string array: {s}"),
205            Self::BadOutputB64(s) => write!(f, "key `output_b64` is not valid base64: {s}"),
206            Self::BadKvLine(s) => write!(f, "frontmatter line is not `key: value`: {s}"),
207        }
208    }
209}
210
211impl std::error::Error for ParseError {}
212
213fn split_frontmatter(text: &str) -> Option<(&str, &str)> {
214    let rest = text.strip_prefix("---\n")?;
215    let end = rest.find("\n---\n")?;
216    let front = &rest[..end];
217    let body = &rest[end + "\n---\n".len()..];
218    Some((front, body))
219}
220
221fn parse_kv_lines(s: &str) -> Result<BTreeMap<String, String>, ParseError> {
222    let mut out = BTreeMap::new();
223    for line in s.lines() {
224        if line.trim().is_empty() {
225            continue;
226        }
227        let (k, v) = line
228            .split_once(": ")
229            .or_else(|| line.split_once(':').map(|(k, v)| (k, v.trim_start())))
230            .ok_or_else(|| ParseError::BadKvLine(line.to_string()))?;
231        out.insert(k.trim().to_string(), v.to_string());
232    }
233    Ok(out)
234}
235
236/// Encode a `Vec<String>` as a JSON array of strings on a single line.
237/// Inline JSON keeps the frontmatter parser dependency-free while still
238/// round-tripping through any YAML reader (JSON is YAML).
239fn encode_string_array(items: &[String]) -> String {
240    let mut s = String::from("[");
241    for (i, item) in items.iter().enumerate() {
242        if i > 0 {
243            s.push(',');
244        }
245        s.push('"');
246        for c in item.chars() {
247            match c {
248                '"' => s.push_str("\\\""),
249                '\\' => s.push_str("\\\\"),
250                '\n' => s.push_str("\\n"),
251                '\r' => s.push_str("\\r"),
252                '\t' => s.push_str("\\t"),
253                c if (c as u32) < 0x20 => {
254                    use std::fmt::Write;
255                    write!(&mut s, "\\u{:04x}", c as u32).unwrap();
256                }
257                c => s.push(c),
258            }
259        }
260        s.push('"');
261    }
262    s.push(']');
263    s
264}
265
266fn decode_string_array(s: &str) -> Result<Vec<String>, String> {
267    let s = s.trim();
268    let inner = s
269        .strip_prefix('[')
270        .and_then(|s| s.strip_suffix(']'))
271        .ok_or_else(|| format!("expected `[...]`, got `{s}`"))?;
272    if inner.trim().is_empty() {
273        return Ok(Vec::new());
274    }
275    let mut out = Vec::new();
276    let mut chars = inner.chars().peekable();
277    loop {
278        while let Some(&c) = chars.peek() {
279            if c.is_whitespace() {
280                chars.next();
281            } else {
282                break;
283            }
284        }
285        match chars.peek() {
286            None => break,
287            Some('"') => {
288                chars.next();
289                let mut buf = String::new();
290                loop {
291                    match chars.next() {
292                        Some('"') => break,
293                        Some('\\') => match chars.next() {
294                            Some('"') => buf.push('"'),
295                            Some('\\') => buf.push('\\'),
296                            Some('n') => buf.push('\n'),
297                            Some('r') => buf.push('\r'),
298                            Some('t') => buf.push('\t'),
299                            Some('/') => buf.push('/'),
300                            Some('u') => {
301                                let mut hex = String::new();
302                                for _ in 0..4 {
303                                    hex.push(chars.next().ok_or("unterminated \\u escape")?);
304                                }
305                                let code = u32::from_str_radix(&hex, 16)
306                                    .map_err(|e| format!("bad \\u escape `{hex}`: {e}"))?;
307                                buf.push(
308                                    char::from_u32(code)
309                                        .ok_or_else(|| format!("invalid unicode {code:#x}"))?,
310                                );
311                            }
312                            Some(c) => return Err(format!("unknown escape `\\{c}`")),
313                            None => return Err("unterminated escape".into()),
314                        },
315                        Some(c) => buf.push(c),
316                        None => return Err("unterminated string".into()),
317                    }
318                }
319                out.push(buf);
320                while let Some(&c) = chars.peek() {
321                    if c.is_whitespace() {
322                        chars.next();
323                    } else {
324                        break;
325                    }
326                }
327                match chars.peek() {
328                    None => break,
329                    Some(',') => {
330                        chars.next();
331                    }
332                    Some(c) => return Err(format!("expected `,` or `]`, got `{c}`")),
333                }
334            }
335            Some(c) => return Err(format!("expected `\"`, got `{c}`")),
336        }
337    }
338    Ok(out)
339}
340
341const B64_ALPHABET: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
342
343pub(crate) fn b64_encode(bytes: &[u8]) -> String {
344    if bytes.is_empty() {
345        return String::new();
346    }
347    let mut out = String::with_capacity(bytes.len().div_ceil(3) * 4);
348    let mut i = 0;
349    while i + 3 <= bytes.len() {
350        let n =
351            (u32::from(bytes[i]) << 16) | (u32::from(bytes[i + 1]) << 8) | u32::from(bytes[i + 2]);
352        out.push(B64_ALPHABET[((n >> 18) & 63) as usize] as char);
353        out.push(B64_ALPHABET[((n >> 12) & 63) as usize] as char);
354        out.push(B64_ALPHABET[((n >> 6) & 63) as usize] as char);
355        out.push(B64_ALPHABET[(n & 63) as usize] as char);
356        i += 3;
357    }
358    let rem = bytes.len() - i;
359    if rem == 1 {
360        let n = u32::from(bytes[i]) << 16;
361        out.push(B64_ALPHABET[((n >> 18) & 63) as usize] as char);
362        out.push(B64_ALPHABET[((n >> 12) & 63) as usize] as char);
363        out.push('=');
364        out.push('=');
365    } else if rem == 2 {
366        let n = (u32::from(bytes[i]) << 16) | (u32::from(bytes[i + 1]) << 8);
367        out.push(B64_ALPHABET[((n >> 18) & 63) as usize] as char);
368        out.push(B64_ALPHABET[((n >> 12) & 63) as usize] as char);
369        out.push(B64_ALPHABET[((n >> 6) & 63) as usize] as char);
370        out.push('=');
371    }
372    out
373}
374
375pub(crate) fn b64_decode(s: &str) -> Result<Vec<u8>, String> {
376    fn val(c: u8) -> Result<u8, String> {
377        Ok(match c {
378            b'A'..=b'Z' => c - b'A',
379            b'a'..=b'z' => c - b'a' + 26,
380            b'0'..=b'9' => c - b'0' + 52,
381            b'+' => 62,
382            b'/' => 63,
383            _ => return Err(format!("bad base64 char `{}`", c as char)),
384        })
385    }
386    let s: String = s.chars().filter(|c| !c.is_whitespace()).collect();
387    if s.is_empty() {
388        return Ok(Vec::new());
389    }
390    if s.len() % 4 != 0 {
391        return Err(format!("base64 length {} not multiple of 4", s.len()));
392    }
393    let bytes = s.as_bytes();
394    let mut out = Vec::with_capacity(s.len() / 4 * 3);
395    let mut i = 0;
396    while i < bytes.len() {
397        let mut n = 0u32;
398        let mut pad = 0u32;
399        for k in 0..4 {
400            let c = bytes[i + k];
401            if c == b'=' {
402                pad += 1;
403                n <<= 6;
404            } else {
405                n = (n << 6) | u32::from(val(c)?);
406            }
407        }
408        out.push(((n >> 16) & 0xff) as u8);
409        if pad < 2 {
410            out.push(((n >> 8) & 0xff) as u8);
411        }
412        if pad < 1 {
413            out.push((n & 0xff) as u8);
414        }
415        i += 4;
416    }
417    Ok(out)
418}
419
420fn shell_quote(args: &[String]) -> String {
421    args.iter()
422        .map(|a| {
423            if a.is_empty()
424                || a.chars()
425                    .any(|c| c.is_whitespace() || matches!(c, '"' | '\'' | '\\' | '$' | '`'))
426            {
427                let escaped = a.replace('\'', "'\\''");
428                format!("'{escaped}'")
429            } else {
430                a.clone()
431            }
432        })
433        .collect::<Vec<_>>()
434        .join(" ")
435}
436
437fn indent_block(s: &str, prefix: &str) -> String {
438    s.lines()
439        .map(|l| format!("{prefix}{l}"))
440        .collect::<Vec<_>>()
441        .join("\n")
442}
443
444#[cfg(test)]
445mod tests {
446    use super::*;
447    use crate::kind::ReviewerKind;
448
449    fn sample() -> ArtifactRecord {
450        ArtifactRecord {
451            path: ArtifactPath::new(1, ReviewerKind::Concept, "no-stale-refs").unwrap(),
452            reviewer: "agent:concept-bot".into(),
453            timestamp: "2026-05-07T14:32:01Z".into(),
454            commit: Some("a3f8c12".into()),
455            command: vec![
456                "grep".into(),
457                "-rn".into(),
458                "old_function_name".into(),
459                "crates/".into(),
460            ],
461            exit_code: 1,
462            output: String::new(),
463            hash: "sha256:abcdef".into(),
464        }
465    }
466
467    #[test]
468    fn parses_back_what_we_render() {
469        let mut r = sample();
470        r.output = "alpha\nbeta\n".into();
471        let s = r.render();
472        let parsed = ArtifactRecord::parse(&s).unwrap();
473        assert_eq!(parsed, r);
474    }
475
476    #[test]
477    fn parses_back_empty_output() {
478        let r = sample();
479        let parsed = ArtifactRecord::parse(&r.render()).unwrap();
480        assert_eq!(parsed, r);
481    }
482
483    #[test]
484    fn b64_round_trip() {
485        let cases: &[&[u8]] = &[
486            b"",
487            b"a",
488            b"ab",
489            b"abc",
490            b"abcd",
491            b"hello world\n",
492            &[0x00, 0xff, b'b', b'i', b'n'],
493        ];
494        for bytes in cases {
495            let enc = b64_encode(bytes);
496            let dec = b64_decode(&enc).unwrap();
497            assert_eq!(&dec[..], *bytes, "round trip failed for {bytes:?}");
498        }
499    }
500
501    #[test]
502    fn rejects_missing_frontmatter() {
503        assert_eq!(
504            ArtifactRecord::parse("# hello"),
505            Err(ParseError::MissingFrontmatter)
506        );
507    }
508
509    #[test]
510    fn rejects_unknown_schema() {
511        let s = "---\nschema: not-koala/v9\n---\n\n# x\n";
512        assert!(matches!(
513            ArtifactRecord::parse(s),
514            Err(ParseError::UnknownSchema(_))
515        ));
516    }
517
518    #[test]
519    fn json_array_round_trip_with_specials() {
520        let v = vec![
521            "grep".to_string(),
522            "with \"quote\" and \\back".to_string(),
523            "tab\there".to_string(),
524            "".to_string(),
525        ];
526        let s = encode_string_array(&v);
527        let back = decode_string_array(&s).unwrap();
528        assert_eq!(back, v);
529    }
530}