1use crate::kind::ReviewerKind;
40use crate::path::ArtifactPath;
41use std::collections::BTreeMap;
42use std::fmt;
43
44pub const SCHEMA_TAG: &str = "koala-artifact/v1";
45
46#[derive(Debug, Clone, PartialEq, Eq)]
47pub struct ArtifactRecord {
48 pub path: ArtifactPath,
49 pub reviewer: String,
50 pub timestamp: String,
51 pub commit: Option<String>,
52 pub command: Vec<String>,
53 pub exit_code: i32,
54 pub output: String,
55 pub hash: String,
56}
57
58impl ArtifactRecord {
59 pub fn render(&self) -> String {
60 let mut out = String::new();
61 out.push_str("---\n");
62 out.push_str(&format!("schema: {SCHEMA_TAG}\n"));
63 out.push_str(&format!("kind: {}\n", self.path.kind));
64 out.push_str(&format!("name: {}\n", self.path.name));
65 out.push_str(&format!("reviewer: {}\n", self.reviewer));
66 out.push_str(&format!("round: {}\n", self.path.round));
67 out.push_str(&format!("timestamp: {}\n", self.timestamp));
68 if let Some(c) = &self.commit {
69 out.push_str(&format!("commit: {c}\n"));
70 }
71 out.push_str(&format!("exit: {}\n", self.exit_code));
72 out.push_str(&format!("hash: {}\n", self.hash));
73 out.push_str(&format!(
74 "command: {}\n",
75 encode_string_array(&self.command)
76 ));
77 out.push_str(&format!(
78 "output_b64: {}\n",
79 b64_encode(self.output.as_bytes())
80 ));
81 out.push_str("---\n\n");
82
83 out.push_str(&format!(
84 "# {} review: {}\n\n",
85 self.path.kind.title(),
86 self.path.name
87 ));
88 out.push_str("## Command\n\n");
89 out.push_str(&indent_block(&shell_quote(&self.command), " "));
90 out.push_str("\n\n## Exit\n\n");
91 out.push_str(&format!("{}\n\n", self.exit_code));
92 out.push_str("## Output\n\n");
93 if self.output.is_empty() {
94 out.push_str(" (empty)\n");
95 } else {
96 out.push_str(&indent_block(self.output.trim_end_matches('\n'), " "));
97 out.push('\n');
98 }
99 out.push_str("\n## Hash\n\n");
100 out.push_str(&format!("`{}`\n", self.hash));
101 out
102 }
103
104 pub fn parse(text: &str) -> Result<Self, ParseError> {
108 let (front, _body) = split_frontmatter(text).ok_or(ParseError::MissingFrontmatter)?;
109 let map = parse_kv_lines(front)?;
110
111 let schema = map
112 .get("schema")
113 .ok_or(ParseError::MissingField("schema"))?;
114 if schema != SCHEMA_TAG {
115 return Err(ParseError::UnknownSchema(schema.clone()));
116 }
117
118 let kind: ReviewerKind = map
119 .get("kind")
120 .ok_or(ParseError::MissingField("kind"))?
121 .parse()
122 .map_err(ParseError::BadKind)?;
123 let name = map
124 .get("name")
125 .ok_or(ParseError::MissingField("name"))?
126 .clone();
127 let round: u32 = map
128 .get("round")
129 .ok_or(ParseError::MissingField("round"))?
130 .parse()
131 .map_err(|e: std::num::ParseIntError| ParseError::BadInt("round", e.to_string()))?;
132 let path =
133 ArtifactPath::new(round, kind, name).map_err(|e| ParseError::BadPath(e.to_string()))?;
134 let reviewer = map
135 .get("reviewer")
136 .ok_or(ParseError::MissingField("reviewer"))?
137 .clone();
138 let timestamp = map
139 .get("timestamp")
140 .ok_or(ParseError::MissingField("timestamp"))?
141 .clone();
142 let commit = map.get("commit").cloned();
143 let exit_code: i32 = map
144 .get("exit")
145 .ok_or(ParseError::MissingField("exit"))?
146 .parse()
147 .map_err(|e: std::num::ParseIntError| ParseError::BadInt("exit", e.to_string()))?;
148 let hash = map
149 .get("hash")
150 .ok_or(ParseError::MissingField("hash"))?
151 .clone();
152 let command_raw = map
153 .get("command")
154 .ok_or(ParseError::MissingField("command"))?;
155 let command = decode_string_array(command_raw).map_err(ParseError::BadCommand)?;
156 let output = match map.get("output_b64") {
157 Some(b) if !b.is_empty() => {
158 let bytes = b64_decode(b).map_err(ParseError::BadOutputB64)?;
159 String::from_utf8_lossy(&bytes).into_owned()
160 }
161 _ => String::new(),
162 };
163
164 Ok(Self {
165 path,
166 reviewer,
167 timestamp,
168 commit,
169 command,
170 exit_code,
171 output,
172 hash,
173 })
174 }
175}
176
177#[derive(Debug, Clone, PartialEq, Eq)]
178pub enum ParseError {
179 MissingFrontmatter,
180 MissingField(&'static str),
181 UnknownSchema(String),
182 BadKind(String),
183 BadInt(&'static str, String),
184 BadPath(String),
185 BadCommand(String),
186 BadOutputB64(String),
187 BadKvLine(String),
188}
189
190impl fmt::Display for ParseError {
191 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
192 match self {
193 Self::MissingFrontmatter => {
194 write!(f, "artifact is missing the `---` frontmatter block")
195 }
196 Self::MissingField(k) => write!(f, "frontmatter is missing required key `{k}`"),
197 Self::UnknownSchema(s) => write!(
198 f,
199 "unsupported artifact schema `{s}`; expected `{SCHEMA_TAG}`"
200 ),
201 Self::BadKind(s) => write!(f, "{s}"),
202 Self::BadInt(k, s) => write!(f, "key `{k}` is not an integer: {s}"),
203 Self::BadPath(s) => write!(f, "{s}"),
204 Self::BadCommand(s) => write!(f, "key `command` is not a JSON string array: {s}"),
205 Self::BadOutputB64(s) => write!(f, "key `output_b64` is not valid base64: {s}"),
206 Self::BadKvLine(s) => write!(f, "frontmatter line is not `key: value`: {s}"),
207 }
208 }
209}
210
211impl std::error::Error for ParseError {}
212
213fn split_frontmatter(text: &str) -> Option<(&str, &str)> {
214 let rest = text.strip_prefix("---\n")?;
215 let end = rest.find("\n---\n")?;
216 let front = &rest[..end];
217 let body = &rest[end + "\n---\n".len()..];
218 Some((front, body))
219}
220
221fn parse_kv_lines(s: &str) -> Result<BTreeMap<String, String>, ParseError> {
222 let mut out = BTreeMap::new();
223 for line in s.lines() {
224 if line.trim().is_empty() {
225 continue;
226 }
227 let (k, v) = line
228 .split_once(": ")
229 .or_else(|| line.split_once(':').map(|(k, v)| (k, v.trim_start())))
230 .ok_or_else(|| ParseError::BadKvLine(line.to_string()))?;
231 out.insert(k.trim().to_string(), v.to_string());
232 }
233 Ok(out)
234}
235
236fn encode_string_array(items: &[String]) -> String {
240 let mut s = String::from("[");
241 for (i, item) in items.iter().enumerate() {
242 if i > 0 {
243 s.push(',');
244 }
245 s.push('"');
246 for c in item.chars() {
247 match c {
248 '"' => s.push_str("\\\""),
249 '\\' => s.push_str("\\\\"),
250 '\n' => s.push_str("\\n"),
251 '\r' => s.push_str("\\r"),
252 '\t' => s.push_str("\\t"),
253 c if (c as u32) < 0x20 => {
254 use std::fmt::Write;
255 write!(&mut s, "\\u{:04x}", c as u32).unwrap();
256 }
257 c => s.push(c),
258 }
259 }
260 s.push('"');
261 }
262 s.push(']');
263 s
264}
265
266fn decode_string_array(s: &str) -> Result<Vec<String>, String> {
267 let s = s.trim();
268 let inner = s
269 .strip_prefix('[')
270 .and_then(|s| s.strip_suffix(']'))
271 .ok_or_else(|| format!("expected `[...]`, got `{s}`"))?;
272 if inner.trim().is_empty() {
273 return Ok(Vec::new());
274 }
275 let mut out = Vec::new();
276 let mut chars = inner.chars().peekable();
277 loop {
278 while let Some(&c) = chars.peek() {
279 if c.is_whitespace() {
280 chars.next();
281 } else {
282 break;
283 }
284 }
285 match chars.peek() {
286 None => break,
287 Some('"') => {
288 chars.next();
289 let mut buf = String::new();
290 loop {
291 match chars.next() {
292 Some('"') => break,
293 Some('\\') => match chars.next() {
294 Some('"') => buf.push('"'),
295 Some('\\') => buf.push('\\'),
296 Some('n') => buf.push('\n'),
297 Some('r') => buf.push('\r'),
298 Some('t') => buf.push('\t'),
299 Some('/') => buf.push('/'),
300 Some('u') => {
301 let mut hex = String::new();
302 for _ in 0..4 {
303 hex.push(chars.next().ok_or("unterminated \\u escape")?);
304 }
305 let code = u32::from_str_radix(&hex, 16)
306 .map_err(|e| format!("bad \\u escape `{hex}`: {e}"))?;
307 buf.push(
308 char::from_u32(code)
309 .ok_or_else(|| format!("invalid unicode {code:#x}"))?,
310 );
311 }
312 Some(c) => return Err(format!("unknown escape `\\{c}`")),
313 None => return Err("unterminated escape".into()),
314 },
315 Some(c) => buf.push(c),
316 None => return Err("unterminated string".into()),
317 }
318 }
319 out.push(buf);
320 while let Some(&c) = chars.peek() {
321 if c.is_whitespace() {
322 chars.next();
323 } else {
324 break;
325 }
326 }
327 match chars.peek() {
328 None => break,
329 Some(',') => {
330 chars.next();
331 }
332 Some(c) => return Err(format!("expected `,` or `]`, got `{c}`")),
333 }
334 }
335 Some(c) => return Err(format!("expected `\"`, got `{c}`")),
336 }
337 }
338 Ok(out)
339}
340
341const B64_ALPHABET: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
342
343pub(crate) fn b64_encode(bytes: &[u8]) -> String {
344 if bytes.is_empty() {
345 return String::new();
346 }
347 let mut out = String::with_capacity(bytes.len().div_ceil(3) * 4);
348 let mut i = 0;
349 while i + 3 <= bytes.len() {
350 let n =
351 (u32::from(bytes[i]) << 16) | (u32::from(bytes[i + 1]) << 8) | u32::from(bytes[i + 2]);
352 out.push(B64_ALPHABET[((n >> 18) & 63) as usize] as char);
353 out.push(B64_ALPHABET[((n >> 12) & 63) as usize] as char);
354 out.push(B64_ALPHABET[((n >> 6) & 63) as usize] as char);
355 out.push(B64_ALPHABET[(n & 63) as usize] as char);
356 i += 3;
357 }
358 let rem = bytes.len() - i;
359 if rem == 1 {
360 let n = u32::from(bytes[i]) << 16;
361 out.push(B64_ALPHABET[((n >> 18) & 63) as usize] as char);
362 out.push(B64_ALPHABET[((n >> 12) & 63) as usize] as char);
363 out.push('=');
364 out.push('=');
365 } else if rem == 2 {
366 let n = (u32::from(bytes[i]) << 16) | (u32::from(bytes[i + 1]) << 8);
367 out.push(B64_ALPHABET[((n >> 18) & 63) as usize] as char);
368 out.push(B64_ALPHABET[((n >> 12) & 63) as usize] as char);
369 out.push(B64_ALPHABET[((n >> 6) & 63) as usize] as char);
370 out.push('=');
371 }
372 out
373}
374
375pub(crate) fn b64_decode(s: &str) -> Result<Vec<u8>, String> {
376 fn val(c: u8) -> Result<u8, String> {
377 Ok(match c {
378 b'A'..=b'Z' => c - b'A',
379 b'a'..=b'z' => c - b'a' + 26,
380 b'0'..=b'9' => c - b'0' + 52,
381 b'+' => 62,
382 b'/' => 63,
383 _ => return Err(format!("bad base64 char `{}`", c as char)),
384 })
385 }
386 let s: String = s.chars().filter(|c| !c.is_whitespace()).collect();
387 if s.is_empty() {
388 return Ok(Vec::new());
389 }
390 if s.len() % 4 != 0 {
391 return Err(format!("base64 length {} not multiple of 4", s.len()));
392 }
393 let bytes = s.as_bytes();
394 let mut out = Vec::with_capacity(s.len() / 4 * 3);
395 let mut i = 0;
396 while i < bytes.len() {
397 let mut n = 0u32;
398 let mut pad = 0u32;
399 for k in 0..4 {
400 let c = bytes[i + k];
401 if c == b'=' {
402 pad += 1;
403 n <<= 6;
404 } else {
405 n = (n << 6) | u32::from(val(c)?);
406 }
407 }
408 out.push(((n >> 16) & 0xff) as u8);
409 if pad < 2 {
410 out.push(((n >> 8) & 0xff) as u8);
411 }
412 if pad < 1 {
413 out.push((n & 0xff) as u8);
414 }
415 i += 4;
416 }
417 Ok(out)
418}
419
420fn shell_quote(args: &[String]) -> String {
421 args.iter()
422 .map(|a| {
423 if a.is_empty()
424 || a.chars()
425 .any(|c| c.is_whitespace() || matches!(c, '"' | '\'' | '\\' | '$' | '`'))
426 {
427 let escaped = a.replace('\'', "'\\''");
428 format!("'{escaped}'")
429 } else {
430 a.clone()
431 }
432 })
433 .collect::<Vec<_>>()
434 .join(" ")
435}
436
437fn indent_block(s: &str, prefix: &str) -> String {
438 s.lines()
439 .map(|l| format!("{prefix}{l}"))
440 .collect::<Vec<_>>()
441 .join("\n")
442}
443
444#[cfg(test)]
445mod tests {
446 use super::*;
447 use crate::kind::ReviewerKind;
448
449 fn sample() -> ArtifactRecord {
450 ArtifactRecord {
451 path: ArtifactPath::new(1, ReviewerKind::Concept, "no-stale-refs").unwrap(),
452 reviewer: "agent:concept-bot".into(),
453 timestamp: "2026-05-07T14:32:01Z".into(),
454 commit: Some("a3f8c12".into()),
455 command: vec![
456 "grep".into(),
457 "-rn".into(),
458 "old_function_name".into(),
459 "crates/".into(),
460 ],
461 exit_code: 1,
462 output: String::new(),
463 hash: "sha256:abcdef".into(),
464 }
465 }
466
467 #[test]
468 fn parses_back_what_we_render() {
469 let mut r = sample();
470 r.output = "alpha\nbeta\n".into();
471 let s = r.render();
472 let parsed = ArtifactRecord::parse(&s).unwrap();
473 assert_eq!(parsed, r);
474 }
475
476 #[test]
477 fn parses_back_empty_output() {
478 let r = sample();
479 let parsed = ArtifactRecord::parse(&r.render()).unwrap();
480 assert_eq!(parsed, r);
481 }
482
483 #[test]
484 fn b64_round_trip() {
485 let cases: &[&[u8]] = &[
486 b"",
487 b"a",
488 b"ab",
489 b"abc",
490 b"abcd",
491 b"hello world\n",
492 &[0x00, 0xff, b'b', b'i', b'n'],
493 ];
494 for bytes in cases {
495 let enc = b64_encode(bytes);
496 let dec = b64_decode(&enc).unwrap();
497 assert_eq!(&dec[..], *bytes, "round trip failed for {bytes:?}");
498 }
499 }
500
501 #[test]
502 fn rejects_missing_frontmatter() {
503 assert_eq!(
504 ArtifactRecord::parse("# hello"),
505 Err(ParseError::MissingFrontmatter)
506 );
507 }
508
509 #[test]
510 fn rejects_unknown_schema() {
511 let s = "---\nschema: not-koala/v9\n---\n\n# x\n";
512 assert!(matches!(
513 ArtifactRecord::parse(s),
514 Err(ParseError::UnknownSchema(_))
515 ));
516 }
517
518 #[test]
519 fn json_array_round_trip_with_specials() {
520 let v = vec![
521 "grep".to_string(),
522 "with \"quote\" and \\back".to_string(),
523 "tab\there".to_string(),
524 "".to_string(),
525 ];
526 let s = encode_string_array(&v);
527 let back = decode_string_array(&s).unwrap();
528 assert_eq!(back, v);
529 }
530}