use crate::kind::ReviewerKind;
use crate::path::ArtifactPath;
use std::collections::BTreeMap;
use std::fmt;
pub const SCHEMA_TAG: &str = "koala-artifact/v1";
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ArtifactRecord {
pub path: ArtifactPath,
pub reviewer: String,
pub timestamp: String,
pub commit: Option<String>,
pub command: Vec<String>,
pub exit_code: i32,
pub output: String,
pub hash: String,
}
impl ArtifactRecord {
pub fn render(&self) -> String {
let mut out = String::new();
out.push_str("---\n");
out.push_str(&format!("schema: {SCHEMA_TAG}\n"));
out.push_str(&format!("kind: {}\n", self.path.kind));
out.push_str(&format!("name: {}\n", self.path.name));
out.push_str(&format!("reviewer: {}\n", self.reviewer));
out.push_str(&format!("round: {}\n", self.path.round));
out.push_str(&format!("timestamp: {}\n", self.timestamp));
if let Some(c) = &self.commit {
out.push_str(&format!("commit: {c}\n"));
}
out.push_str(&format!("exit: {}\n", self.exit_code));
out.push_str(&format!("hash: {}\n", self.hash));
out.push_str(&format!(
"command: {}\n",
encode_string_array(&self.command)
));
out.push_str(&format!(
"output_b64: {}\n",
b64_encode(self.output.as_bytes())
));
out.push_str("---\n\n");
out.push_str(&format!(
"# {} review: {}\n\n",
self.path.kind.title(),
self.path.name
));
out.push_str("## Command\n\n");
out.push_str(&indent_block(&shell_quote(&self.command), " "));
out.push_str("\n\n## Exit\n\n");
out.push_str(&format!("{}\n\n", self.exit_code));
out.push_str("## Output\n\n");
if self.output.is_empty() {
out.push_str(" (empty)\n");
} else {
out.push_str(&indent_block(self.output.trim_end_matches('\n'), " "));
out.push('\n');
}
out.push_str("\n## Hash\n\n");
out.push_str(&format!("`{}`\n", self.hash));
out
}
pub fn parse(text: &str) -> Result<Self, ParseError> {
let (front, _body) = split_frontmatter(text).ok_or(ParseError::MissingFrontmatter)?;
let map = parse_kv_lines(front)?;
let schema = map
.get("schema")
.ok_or(ParseError::MissingField("schema"))?;
if schema != SCHEMA_TAG {
return Err(ParseError::UnknownSchema(schema.clone()));
}
let kind: ReviewerKind = map
.get("kind")
.ok_or(ParseError::MissingField("kind"))?
.parse()
.map_err(ParseError::BadKind)?;
let name = map
.get("name")
.ok_or(ParseError::MissingField("name"))?
.clone();
let round: u32 = map
.get("round")
.ok_or(ParseError::MissingField("round"))?
.parse()
.map_err(|e: std::num::ParseIntError| ParseError::BadInt("round", e.to_string()))?;
let path =
ArtifactPath::new(round, kind, name).map_err(|e| ParseError::BadPath(e.to_string()))?;
let reviewer = map
.get("reviewer")
.ok_or(ParseError::MissingField("reviewer"))?
.clone();
let timestamp = map
.get("timestamp")
.ok_or(ParseError::MissingField("timestamp"))?
.clone();
let commit = map.get("commit").cloned();
let exit_code: i32 = map
.get("exit")
.ok_or(ParseError::MissingField("exit"))?
.parse()
.map_err(|e: std::num::ParseIntError| ParseError::BadInt("exit", e.to_string()))?;
let hash = map
.get("hash")
.ok_or(ParseError::MissingField("hash"))?
.clone();
let command_raw = map
.get("command")
.ok_or(ParseError::MissingField("command"))?;
let command = decode_string_array(command_raw).map_err(ParseError::BadCommand)?;
let output = match map.get("output_b64") {
Some(b) if !b.is_empty() => {
let bytes = b64_decode(b).map_err(ParseError::BadOutputB64)?;
String::from_utf8_lossy(&bytes).into_owned()
}
_ => String::new(),
};
Ok(Self {
path,
reviewer,
timestamp,
commit,
command,
exit_code,
output,
hash,
})
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ParseError {
MissingFrontmatter,
MissingField(&'static str),
UnknownSchema(String),
BadKind(String),
BadInt(&'static str, String),
BadPath(String),
BadCommand(String),
BadOutputB64(String),
BadKvLine(String),
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::MissingFrontmatter => {
write!(f, "artifact is missing the `---` frontmatter block")
}
Self::MissingField(k) => write!(f, "frontmatter is missing required key `{k}`"),
Self::UnknownSchema(s) => write!(
f,
"unsupported artifact schema `{s}`; expected `{SCHEMA_TAG}`"
),
Self::BadKind(s) => write!(f, "{s}"),
Self::BadInt(k, s) => write!(f, "key `{k}` is not an integer: {s}"),
Self::BadPath(s) => write!(f, "{s}"),
Self::BadCommand(s) => write!(f, "key `command` is not a JSON string array: {s}"),
Self::BadOutputB64(s) => write!(f, "key `output_b64` is not valid base64: {s}"),
Self::BadKvLine(s) => write!(f, "frontmatter line is not `key: value`: {s}"),
}
}
}
impl std::error::Error for ParseError {}
fn split_frontmatter(text: &str) -> Option<(&str, &str)> {
let rest = text.strip_prefix("---\n")?;
let end = rest.find("\n---\n")?;
let front = &rest[..end];
let body = &rest[end + "\n---\n".len()..];
Some((front, body))
}
fn parse_kv_lines(s: &str) -> Result<BTreeMap<String, String>, ParseError> {
let mut out = BTreeMap::new();
for line in s.lines() {
if line.trim().is_empty() {
continue;
}
let (k, v) = line
.split_once(": ")
.or_else(|| line.split_once(':').map(|(k, v)| (k, v.trim_start())))
.ok_or_else(|| ParseError::BadKvLine(line.to_string()))?;
out.insert(k.trim().to_string(), v.to_string());
}
Ok(out)
}
fn encode_string_array(items: &[String]) -> String {
let mut s = String::from("[");
for (i, item) in items.iter().enumerate() {
if i > 0 {
s.push(',');
}
s.push('"');
for c in item.chars() {
match c {
'"' => s.push_str("\\\""),
'\\' => s.push_str("\\\\"),
'\n' => s.push_str("\\n"),
'\r' => s.push_str("\\r"),
'\t' => s.push_str("\\t"),
c if (c as u32) < 0x20 => {
use std::fmt::Write;
write!(&mut s, "\\u{:04x}", c as u32).unwrap();
}
c => s.push(c),
}
}
s.push('"');
}
s.push(']');
s
}
fn decode_string_array(s: &str) -> Result<Vec<String>, String> {
let s = s.trim();
let inner = s
.strip_prefix('[')
.and_then(|s| s.strip_suffix(']'))
.ok_or_else(|| format!("expected `[...]`, got `{s}`"))?;
if inner.trim().is_empty() {
return Ok(Vec::new());
}
let mut out = Vec::new();
let mut chars = inner.chars().peekable();
loop {
while let Some(&c) = chars.peek() {
if c.is_whitespace() {
chars.next();
} else {
break;
}
}
match chars.peek() {
None => break,
Some('"') => {
chars.next();
let mut buf = String::new();
loop {
match chars.next() {
Some('"') => break,
Some('\\') => match chars.next() {
Some('"') => buf.push('"'),
Some('\\') => buf.push('\\'),
Some('n') => buf.push('\n'),
Some('r') => buf.push('\r'),
Some('t') => buf.push('\t'),
Some('/') => buf.push('/'),
Some('u') => {
let mut hex = String::new();
for _ in 0..4 {
hex.push(chars.next().ok_or("unterminated \\u escape")?);
}
let code = u32::from_str_radix(&hex, 16)
.map_err(|e| format!("bad \\u escape `{hex}`: {e}"))?;
buf.push(
char::from_u32(code)
.ok_or_else(|| format!("invalid unicode {code:#x}"))?,
);
}
Some(c) => return Err(format!("unknown escape `\\{c}`")),
None => return Err("unterminated escape".into()),
},
Some(c) => buf.push(c),
None => return Err("unterminated string".into()),
}
}
out.push(buf);
while let Some(&c) = chars.peek() {
if c.is_whitespace() {
chars.next();
} else {
break;
}
}
match chars.peek() {
None => break,
Some(',') => {
chars.next();
}
Some(c) => return Err(format!("expected `,` or `]`, got `{c}`")),
}
}
Some(c) => return Err(format!("expected `\"`, got `{c}`")),
}
}
Ok(out)
}
const B64_ALPHABET: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
pub(crate) fn b64_encode(bytes: &[u8]) -> String {
if bytes.is_empty() {
return String::new();
}
let mut out = String::with_capacity(bytes.len().div_ceil(3) * 4);
let mut i = 0;
while i + 3 <= bytes.len() {
let n =
(u32::from(bytes[i]) << 16) | (u32::from(bytes[i + 1]) << 8) | u32::from(bytes[i + 2]);
out.push(B64_ALPHABET[((n >> 18) & 63) as usize] as char);
out.push(B64_ALPHABET[((n >> 12) & 63) as usize] as char);
out.push(B64_ALPHABET[((n >> 6) & 63) as usize] as char);
out.push(B64_ALPHABET[(n & 63) as usize] as char);
i += 3;
}
let rem = bytes.len() - i;
if rem == 1 {
let n = u32::from(bytes[i]) << 16;
out.push(B64_ALPHABET[((n >> 18) & 63) as usize] as char);
out.push(B64_ALPHABET[((n >> 12) & 63) as usize] as char);
out.push('=');
out.push('=');
} else if rem == 2 {
let n = (u32::from(bytes[i]) << 16) | (u32::from(bytes[i + 1]) << 8);
out.push(B64_ALPHABET[((n >> 18) & 63) as usize] as char);
out.push(B64_ALPHABET[((n >> 12) & 63) as usize] as char);
out.push(B64_ALPHABET[((n >> 6) & 63) as usize] as char);
out.push('=');
}
out
}
pub(crate) fn b64_decode(s: &str) -> Result<Vec<u8>, String> {
fn val(c: u8) -> Result<u8, String> {
Ok(match c {
b'A'..=b'Z' => c - b'A',
b'a'..=b'z' => c - b'a' + 26,
b'0'..=b'9' => c - b'0' + 52,
b'+' => 62,
b'/' => 63,
_ => return Err(format!("bad base64 char `{}`", c as char)),
})
}
let s: String = s.chars().filter(|c| !c.is_whitespace()).collect();
if s.is_empty() {
return Ok(Vec::new());
}
if s.len() % 4 != 0 {
return Err(format!("base64 length {} not multiple of 4", s.len()));
}
let bytes = s.as_bytes();
let mut out = Vec::with_capacity(s.len() / 4 * 3);
let mut i = 0;
while i < bytes.len() {
let mut n = 0u32;
let mut pad = 0u32;
for k in 0..4 {
let c = bytes[i + k];
if c == b'=' {
pad += 1;
n <<= 6;
} else {
n = (n << 6) | u32::from(val(c)?);
}
}
out.push(((n >> 16) & 0xff) as u8);
if pad < 2 {
out.push(((n >> 8) & 0xff) as u8);
}
if pad < 1 {
out.push((n & 0xff) as u8);
}
i += 4;
}
Ok(out)
}
fn shell_quote(args: &[String]) -> String {
args.iter()
.map(|a| {
if a.is_empty()
|| a.chars()
.any(|c| c.is_whitespace() || matches!(c, '"' | '\'' | '\\' | '$' | '`'))
{
let escaped = a.replace('\'', "'\\''");
format!("'{escaped}'")
} else {
a.clone()
}
})
.collect::<Vec<_>>()
.join(" ")
}
fn indent_block(s: &str, prefix: &str) -> String {
s.lines()
.map(|l| format!("{prefix}{l}"))
.collect::<Vec<_>>()
.join("\n")
}
#[cfg(test)]
mod tests {
use super::*;
use crate::kind::ReviewerKind;
fn sample() -> ArtifactRecord {
ArtifactRecord {
path: ArtifactPath::new(1, ReviewerKind::Concept, "no-stale-refs").unwrap(),
reviewer: "agent:concept-bot".into(),
timestamp: "2026-05-07T14:32:01Z".into(),
commit: Some("a3f8c12".into()),
command: vec![
"grep".into(),
"-rn".into(),
"old_function_name".into(),
"crates/".into(),
],
exit_code: 1,
output: String::new(),
hash: "sha256:abcdef".into(),
}
}
#[test]
fn parses_back_what_we_render() {
let mut r = sample();
r.output = "alpha\nbeta\n".into();
let s = r.render();
let parsed = ArtifactRecord::parse(&s).unwrap();
assert_eq!(parsed, r);
}
#[test]
fn parses_back_empty_output() {
let r = sample();
let parsed = ArtifactRecord::parse(&r.render()).unwrap();
assert_eq!(parsed, r);
}
#[test]
fn b64_round_trip() {
let cases: &[&[u8]] = &[
b"",
b"a",
b"ab",
b"abc",
b"abcd",
b"hello world\n",
&[0x00, 0xff, b'b', b'i', b'n'],
];
for bytes in cases {
let enc = b64_encode(bytes);
let dec = b64_decode(&enc).unwrap();
assert_eq!(&dec[..], *bytes, "round trip failed for {bytes:?}");
}
}
#[test]
fn rejects_missing_frontmatter() {
assert_eq!(
ArtifactRecord::parse("# hello"),
Err(ParseError::MissingFrontmatter)
);
}
#[test]
fn rejects_unknown_schema() {
let s = "---\nschema: not-koala/v9\n---\n\n# x\n";
assert!(matches!(
ArtifactRecord::parse(s),
Err(ParseError::UnknownSchema(_))
));
}
#[test]
fn json_array_round_trip_with_specials() {
let v = vec![
"grep".to_string(),
"with \"quote\" and \\back".to_string(),
"tab\there".to_string(),
"".to_string(),
];
let s = encode_string_array(&v);
let back = decode_string_array(&s).unwrap();
assert_eq!(back, v);
}
}