1use std::path::Path;
22use std::process::Command;
23
24use rusqlite::{params, Connection};
25use serde::{Deserialize, Serialize};
26use thiserror::Error;
27
28#[derive(Debug, Error)]
31pub enum VerifyError {
32 #[error("database error: {0}")]
33 Database(String),
34 #[error("serialization error: {0}")]
35 Serialization(String),
36 #[error("claimcheck error: {0}")]
37 ClaimCheck(String),
38}
39
40impl From<rusqlite::Error> for VerifyError {
41 fn from(e: rusqlite::Error) -> Self {
42 VerifyError::Database(e.to_string())
43 }
44}
45
46impl From<serde_json::Error> for VerifyError {
47 fn from(e: serde_json::Error) -> Self {
48 VerifyError::Serialization(e.to_string())
49 }
50}
51
52pub fn claimcheck_available() -> bool {
56 Command::new("claimcheck").arg("--help").output().is_ok()
57}
58
59#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct ClaimCheckReport {
62 pub truth_score: String,
63 pub summary: ClaimCheckSummary,
64 pub claims: Vec<ClaimCheckClaim>,
65}
66
67#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct ClaimCheckSummary {
69 pub total: u32,
70 pub pass: u32,
71 pub fail: u32,
72 pub unverifiable: u32,
73}
74
75#[derive(Debug, Clone, Serialize, Deserialize)]
76pub struct ClaimCheckClaim {
77 pub claim_type: String,
78 pub raw_text: String,
79 pub result: String, pub reason: Option<String>,
81}
82
83pub fn run_claimcheck(
86 transcript_path: &Path,
87 project_dir: &Path,
88 baseline: Option<&str>,
89 retest: bool,
90 test_cmd: Option<&str>,
91) -> Result<ClaimCheckReport, VerifyError> {
92 let mut cmd = Command::new("claimcheck");
93 cmd.arg(transcript_path)
94 .arg("--json")
95 .arg("--project-dir")
96 .arg(project_dir);
97
98 if let Some(b) = baseline {
99 cmd.arg("--baseline").arg(b);
100 }
101 if retest {
102 cmd.arg("--retest");
103 if let Some(tc) = test_cmd {
104 cmd.arg("--test-cmd").arg(tc);
105 }
106 }
107
108 let output = cmd.output().map_err(|e| VerifyError::ClaimCheck(e.to_string()))?;
109
110 let stdout = String::from_utf8_lossy(&output.stdout);
112 if stdout.trim().is_empty() {
113 let stderr = String::from_utf8_lossy(&output.stderr);
114 return Err(VerifyError::ClaimCheck(format!(
115 "claimcheck produced no output. stderr: {stderr}"
116 )));
117 }
118
119 serde_json::from_str(&stdout).map_err(|e| {
120 VerifyError::ClaimCheck(format!("failed to parse claimcheck JSON: {e}\noutput: {stdout}"))
121 })
122}
123
124#[derive(Debug, Clone, Serialize, Deserialize)]
127pub struct AgentClaim {
128 pub action_type: String,
129 pub resource: String,
130 pub claimed_at: String,
131}
132
133#[derive(Debug, Clone, Serialize, Deserialize)]
134pub struct SessionAction {
135 pub step_number: i64,
136 pub timestamp: String,
137 pub action_type: String,
138 pub agent_pid: i64,
139 pub payload: String,
140}
141
142#[derive(Debug, Clone, Serialize, Deserialize)]
143pub struct SessionEvidence {
144 pub session_id: String,
145 pub actions: Vec<SessionAction>,
146}
147
148#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
149#[serde(tag = "verdict", content = "reason")]
150pub enum ClaimVerdict {
151 Pass,
152 Fail,
153 Inconclusive { reason: String },
154}
155
156#[derive(Debug, Clone, Serialize, Deserialize)]
157pub struct ClaimResult {
158 pub claim: AgentClaim,
159 pub verdict: ClaimVerdict,
160 pub discrepancies: Vec<String>,
161}
162
163#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
164pub enum VerificationStatus {
165 Verified,
166 Unverified,
167 Inconclusive,
168}
169
170impl std::fmt::Display for VerificationStatus {
171 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
172 match self {
173 VerificationStatus::Verified => write!(f, "Verified"),
174 VerificationStatus::Unverified => write!(f, "Unverified"),
175 VerificationStatus::Inconclusive => write!(f, "Inconclusive"),
176 }
177 }
178}
179
180#[derive(Debug, Clone, Serialize, Deserialize)]
182pub struct VerificationReport {
183 pub session_id: String,
184 pub overall_status: VerificationStatus,
185 pub claims: Vec<ClaimResult>,
186 pub truth_score: Option<String>,
188 pub claimcheck_raw: Option<ClaimCheckReport>,
190}
191
192pub struct VerificationEngine {
195 pub db: Connection,
196}
197
198impl VerificationEngine {
199 pub fn new(db: Connection) -> Self {
200 Self { db }
201 }
202
203 pub fn verify_with_claimcheck(
211 &self,
212 session_id: &str,
213 transcript_path: &Path,
214 project_dir: &Path,
215 baseline: Option<&str>,
216 retest: bool,
217 test_cmd: Option<&str>,
218 ) -> Result<VerificationReport, VerifyError> {
219 let cc = run_claimcheck(transcript_path, project_dir, baseline, retest, test_cmd)?;
220
221 let claims: Vec<ClaimResult> = cc.claims.iter().map(|c| {
223 let verdict = match c.result.as_str() {
224 "PASS" => ClaimVerdict::Pass,
225 "FAIL" => ClaimVerdict::Fail,
226 _ => ClaimVerdict::Inconclusive {
227 reason: c.reason.clone().unwrap_or_else(|| "unverifiable".to_string()),
228 },
229 };
230 let discrepancies = if let Some(ref r) = c.reason {
231 vec![r.clone()]
232 } else {
233 vec![]
234 };
235 ClaimResult {
236 claim: AgentClaim {
237 action_type: c.claim_type.clone(),
238 resource: c.raw_text.clone(),
239 claimed_at: String::new(),
240 },
241 verdict,
242 discrepancies,
243 }
244 }).collect();
245
246 let overall_status = if cc.summary.fail > 0 {
247 VerificationStatus::Unverified
248 } else if cc.summary.pass > 0 {
249 VerificationStatus::Verified
250 } else {
251 VerificationStatus::Inconclusive
252 };
253
254 let report = VerificationReport {
255 session_id: session_id.to_string(),
256 overall_status,
257 claims,
258 truth_score: Some(cc.truth_score.clone()),
259 claimcheck_raw: Some(cc),
260 };
261
262 self.store_report_full(&report)?;
263 Ok(report)
264 }
265
266 pub fn verify_session(
268 &self,
269 session_id: &str,
270 claims: Vec<AgentClaim>,
271 ) -> Result<VerificationReport, VerifyError> {
272 let actions = self.load_actions(session_id)?;
273 let evidence = SessionEvidence { session_id: session_id.to_string(), actions };
274
275 let mut results = Vec::with_capacity(claims.len());
276 for claim in claims {
277 let verdict = self.verify_claim(&claim, &evidence);
278 let discrepancies = match &verdict {
279 ClaimVerdict::Fail => vec![format!(
280 "no recorded {} action for resource '{}'",
281 claim.action_type, claim.resource
282 )],
283 _ => vec![],
284 };
285 results.push(ClaimResult { claim, verdict, discrepancies });
286 }
287
288 let overall_status = derive_status(&results);
289 let report = VerificationReport {
290 session_id: session_id.to_string(),
291 overall_status,
292 claims: results,
293 truth_score: None,
294 claimcheck_raw: None,
295 };
296
297 self.store_report_full(&report)?;
298 Ok(report)
299 }
300
301 pub fn verify_claim(&self, claim: &AgentClaim, evidence: &SessionEvidence) -> ClaimVerdict {
302 if evidence.actions.is_empty() {
303 return ClaimVerdict::Inconclusive { reason: "no evidence".to_string() };
304 }
305 let matched = evidence.actions.iter().any(|a| {
306 a.action_type == claim.action_type && action_matches_resource(a, &claim.resource)
307 });
308 if matched { ClaimVerdict::Pass } else { ClaimVerdict::Fail }
309 }
310
311 fn load_actions(&self, session_id: &str) -> Result<Vec<SessionAction>, VerifyError> {
312 let mut stmt = self.db.prepare(
313 "SELECT step_number, timestamp, action_type, agent_pid, payload \
314 FROM session_actions WHERE session_id = ?1 ORDER BY step_number ASC",
315 )?;
316 let rows = stmt.query_map(params![session_id], |row| {
317 Ok(SessionAction {
318 step_number: row.get(0)?,
319 timestamp: row.get(1)?,
320 action_type: row.get(2)?,
321 agent_pid: row.get(3)?,
322 payload: row.get(4)?,
323 })
324 })?;
325 let mut actions = Vec::new();
326 for row in rows {
327 actions.push(row?);
328 }
329 Ok(actions)
330 }
331
332 fn store_report_full(&self, report: &VerificationReport) -> Result<(), VerifyError> {
333 let json = serde_json::to_string(report)?;
334 self.db.execute(
335 "INSERT INTO verification_reports (session_id, timestamp, overall_status, report_json) \
336 VALUES (?1, datetime('now'), ?2, ?3)",
337 params![report.session_id, report.overall_status.to_string(), json],
338 )?;
339 Ok(())
340 }
341}
342
343fn action_matches_resource(action: &SessionAction, resource: &str) -> bool {
344 if let Ok(v) = serde_json::from_str::<serde_json::Value>(&action.payload) {
345 for key in &["resource", "path", "url", "command"] {
346 if let Some(val) = v.get(key).and_then(|x| x.as_str()) {
347 if val == resource {
348 return true;
349 }
350 }
351 }
352 }
353 false
354}
355
356fn derive_status(results: &[ClaimResult]) -> VerificationStatus {
357 if results.is_empty() {
358 return VerificationStatus::Inconclusive;
359 }
360 if results.iter().any(|r| r.verdict == ClaimVerdict::Fail) {
361 return VerificationStatus::Unverified;
362 }
363 if results.iter().all(|r| r.verdict == ClaimVerdict::Pass) {
364 VerificationStatus::Verified
365 } else {
366 VerificationStatus::Inconclusive
367 }
368}
369
370#[cfg(test)]
371mod tests {
372 use super::*;
373 use rusqlite::Connection;
374
375 fn in_memory_db() -> Connection {
376 let conn = Connection::open_in_memory().unwrap();
377 conn.execute_batch(SCHEMA).unwrap();
378 conn
379 }
380
381 const SCHEMA: &str = "
382 CREATE TABLE IF NOT EXISTS sessions (
383 id TEXT PRIMARY KEY, started_at TEXT NOT NULL, ended_at TEXT,
384 status TEXT NOT NULL DEFAULT 'Active'
385 );
386 CREATE TABLE IF NOT EXISTS session_actions (
387 id INTEGER PRIMARY KEY AUTOINCREMENT,
388 session_id TEXT NOT NULL REFERENCES sessions(id),
389 step_number INTEGER NOT NULL,
390 timestamp TEXT NOT NULL,
391 action_type TEXT NOT NULL,
392 agent_pid INTEGER NOT NULL,
393 payload TEXT NOT NULL,
394 UNIQUE(session_id, step_number)
395 );
396 CREATE TABLE IF NOT EXISTS verification_reports (
397 id INTEGER PRIMARY KEY AUTOINCREMENT,
398 session_id TEXT NOT NULL REFERENCES sessions(id),
399 timestamp TEXT NOT NULL,
400 overall_status TEXT NOT NULL,
401 report_json TEXT NOT NULL
402 );
403 ";
404
405 fn insert_session(conn: &Connection, id: &str) {
406 conn.execute(
407 "INSERT INTO sessions (id, started_at) VALUES (?1, datetime('now'))",
408 params![id],
409 ).unwrap();
410 }
411
412 fn insert_action(conn: &Connection, session_id: &str, step: i64, action_type: &str, payload: &str) {
413 conn.execute(
414 "INSERT INTO session_actions \
415 (session_id, step_number, timestamp, action_type, agent_pid, payload) \
416 VALUES (?1, ?2, datetime('now'), ?3, 1, ?4)",
417 params![session_id, step, action_type, payload],
418 ).unwrap();
419 }
420
421 #[test]
424 fn claimcheck_available_check_does_not_panic() {
425 let _ = claimcheck_available();
426 }
427
428 #[test]
429 fn run_claimcheck_on_nonexistent_transcript_returns_error() {
430 if !claimcheck_available() {
431 return; }
433 let result = run_claimcheck(
434 Path::new("/tmp/nonexistent-hawk-transcript.jsonl"),
435 Path::new("."),
436 None,
437 false,
438 None,
439 );
440 assert!(result.is_err());
441 }
442
443 #[test]
444 fn run_claimcheck_with_real_transcript_when_installed() {
445 if !claimcheck_available() {
446 return;
447 }
448 let dir = tempfile::TempDir::new().unwrap();
450 let transcript = dir.path().join("session.jsonl");
451 std::fs::write(
452 &transcript,
453 r#"{"role":"user","content":"Create a file"}
454{"role":"assistant","content":"I created /tmp/hawk-test-claimcheck.txt with the content."}
455"#,
456 ).unwrap();
457
458 let result = run_claimcheck(&transcript, dir.path(), None, false, None);
459 match result {
461 Ok(report) => {
462 assert!(!report.truth_score.is_empty());
463 }
465 Err(VerifyError::ClaimCheck(_)) => {
466 }
468 Err(e) => panic!("unexpected error: {e}"),
469 }
470 }
471
472 #[test]
475 fn inconclusive_when_no_evidence() {
476 let engine = VerificationEngine::new(in_memory_db());
477 let claim = AgentClaim {
478 action_type: "file_write".into(),
479 resource: "/tmp/out.txt".into(),
480 claimed_at: "2024-01-01T00:00:00Z".into(),
481 };
482 let evidence = SessionEvidence { session_id: "s1".into(), actions: vec![] };
483 assert_eq!(
484 engine.verify_claim(&claim, &evidence),
485 ClaimVerdict::Inconclusive { reason: "no evidence".into() }
486 );
487 }
488
489 #[test]
490 fn pass_when_matching_action_exists() {
491 let engine = VerificationEngine::new(in_memory_db());
492 let claim = AgentClaim {
493 action_type: "file_write".into(),
494 resource: "/tmp/out.txt".into(),
495 claimed_at: "2024-01-01T00:00:00Z".into(),
496 };
497 let evidence = SessionEvidence {
498 session_id: "s1".into(),
499 actions: vec![SessionAction {
500 step_number: 1,
501 timestamp: "2024-01-01T00:00:00Z".into(),
502 action_type: "file_write".into(),
503 agent_pid: 42,
504 payload: r#"{"path":"/tmp/out.txt"}"#.into(),
505 }],
506 };
507 assert_eq!(engine.verify_claim(&claim, &evidence), ClaimVerdict::Pass);
508 }
509
510 #[test]
511 fn fail_when_no_matching_action() {
512 let engine = VerificationEngine::new(in_memory_db());
513 let claim = AgentClaim {
514 action_type: "api_call".into(),
515 resource: "https://api.openai.com/v1/chat".into(),
516 claimed_at: "2024-01-01T00:00:00Z".into(),
517 };
518 let evidence = SessionEvidence {
519 session_id: "s1".into(),
520 actions: vec![SessionAction {
521 step_number: 1,
522 timestamp: "2024-01-01T00:00:00Z".into(),
523 action_type: "file_write".into(),
524 agent_pid: 42,
525 payload: r#"{"path":"/tmp/out.txt"}"#.into(),
526 }],
527 };
528 assert_eq!(engine.verify_claim(&claim, &evidence), ClaimVerdict::Fail);
529 }
530
531 #[test]
532 fn verify_session_all_pass_returns_verified() {
533 let conn = in_memory_db();
534 insert_session(&conn, "sess-pass");
535 insert_action(&conn, "sess-pass", 1, "file_write", r#"{"path":"/tmp/result.txt"}"#);
536 let engine = VerificationEngine::new(conn);
537 let claims = vec![AgentClaim {
538 action_type: "file_write".into(),
539 resource: "/tmp/result.txt".into(),
540 claimed_at: "2024-01-01T00:00:00Z".into(),
541 }];
542 let report = engine.verify_session("sess-pass", claims).unwrap();
543 assert_eq!(report.overall_status, VerificationStatus::Verified);
544 assert_eq!(report.claims[0].verdict, ClaimVerdict::Pass);
545 assert!(report.truth_score.is_none()); }
547
548 #[test]
549 fn verify_session_any_fail_returns_unverified() {
550 let conn = in_memory_db();
551 insert_session(&conn, "sess-fail");
552 insert_action(&conn, "sess-fail", 1, "file_write", r#"{"path":"/tmp/result.txt"}"#);
553 let engine = VerificationEngine::new(conn);
554 let claims = vec![
555 AgentClaim {
556 action_type: "file_write".into(),
557 resource: "/tmp/result.txt".into(),
558 claimed_at: "2024-01-01T00:00:00Z".into(),
559 },
560 AgentClaim {
561 action_type: "api_call".into(),
562 resource: "https://api.openai.com/v1/chat".into(),
563 claimed_at: "2024-01-01T00:00:00Z".into(),
564 },
565 ];
566 let report = engine.verify_session("sess-fail", claims).unwrap();
567 assert_eq!(report.overall_status, VerificationStatus::Unverified);
568 }
569
570 #[test]
571 fn verify_session_no_actions_returns_inconclusive() {
572 let conn = in_memory_db();
573 insert_session(&conn, "sess-inc");
574 let engine = VerificationEngine::new(conn);
575 let claims = vec![AgentClaim {
576 action_type: "file_write".into(),
577 resource: "/tmp/out.txt".into(),
578 claimed_at: "2024-01-01T00:00:00Z".into(),
579 }];
580 let report = engine.verify_session("sess-inc", claims).unwrap();
581 assert_eq!(report.overall_status, VerificationStatus::Inconclusive);
582 }
583
584 #[test]
585 fn verify_session_stores_report_in_sqlite() {
586 let conn = in_memory_db();
587 insert_session(&conn, "sess-store");
588 insert_action(&conn, "sess-store", 1, "file_write", r#"{"path":"/tmp/x.txt"}"#);
589 let engine = VerificationEngine::new(conn);
590 let claims = vec![AgentClaim {
591 action_type: "file_write".into(),
592 resource: "/tmp/x.txt".into(),
593 claimed_at: "2024-01-01T00:00:00Z".into(),
594 }];
595 engine.verify_session("sess-store", claims).unwrap();
596 let count: i64 = engine.db.query_row(
597 "SELECT COUNT(*) FROM verification_reports WHERE session_id = 'sess-store'",
598 [],
599 |row| row.get(0),
600 ).unwrap();
601 assert_eq!(count, 1);
602 }
603
604 #[test]
605 fn verify_session_empty_claims_returns_inconclusive() {
606 let conn = in_memory_db();
607 insert_session(&conn, "sess-empty");
608 let engine = VerificationEngine::new(conn);
609 let report = engine.verify_session("sess-empty", vec![]).unwrap();
610 assert_eq!(report.overall_status, VerificationStatus::Inconclusive);
611 }
612
613 #[test]
614 fn discrepancies_populated_on_fail() {
615 let conn = in_memory_db();
616 insert_session(&conn, "sess-disc");
617 insert_action(&conn, "sess-disc", 1, "file_write", r#"{"path":"/tmp/other.txt"}"#);
618 let engine = VerificationEngine::new(conn);
619 let claims = vec![AgentClaim {
620 action_type: "file_write".into(),
621 resource: "/tmp/missing.txt".into(),
622 claimed_at: "2024-01-01T00:00:00Z".into(),
623 }];
624 let report = engine.verify_session("sess-disc", claims).unwrap();
625 assert_eq!(report.claims[0].verdict, ClaimVerdict::Fail);
626 assert!(!report.claims[0].discrepancies.is_empty());
627 }
628
629 #[test]
630 fn api_call_matched_via_url_field() {
631 let engine = VerificationEngine::new(in_memory_db());
632 let claim = AgentClaim {
633 action_type: "api_call".into(),
634 resource: "https://api.openai.com/v1/chat".into(),
635 claimed_at: "2024-01-01T00:00:00Z".into(),
636 };
637 let evidence = SessionEvidence {
638 session_id: "s1".into(),
639 actions: vec![SessionAction {
640 step_number: 1,
641 timestamp: "2024-01-01T00:00:00Z".into(),
642 action_type: "api_call".into(),
643 agent_pid: 42,
644 payload: r#"{"url":"https://api.openai.com/v1/chat"}"#.into(),
645 }],
646 };
647 assert_eq!(engine.verify_claim(&claim, &evidence), ClaimVerdict::Pass);
648 }
649
650 #[test]
651 fn command_exec_matched_via_command_field() {
652 let engine = VerificationEngine::new(in_memory_db());
653 let claim = AgentClaim {
654 action_type: "command_exec".into(),
655 resource: "python3".into(),
656 claimed_at: "2024-01-01T00:00:00Z".into(),
657 };
658 let evidence = SessionEvidence {
659 session_id: "s1".into(),
660 actions: vec![SessionAction {
661 step_number: 1,
662 timestamp: "2024-01-01T00:00:00Z".into(),
663 action_type: "command_exec".into(),
664 agent_pid: 42,
665 payload: r#"{"command":"python3"}"#.into(),
666 }],
667 };
668 assert_eq!(engine.verify_claim(&claim, &evidence), ClaimVerdict::Pass);
669 }
670}