openhawk_verify/
lib.rs

1// hawk-verify: ClaimCheck bridge
2//
3// claimcheck: https://github.com/ojuschugh1/claimcheck
4//
5// claimcheck <transcript.jsonl> --json --project-dir <path>
6//
7// JSON output:
8// {
9//   "truth_score": "67%",
10//   "summary": { "total": 4, "pass": 2, "fail": 1, "unverifiable": 1 },
11//   "claims": [
12//     { "claim_type": "File", "raw_text": "created src/auth.ts", "result": "PASS", "reason": null },
13//     { "claim_type": "Package", "raw_text": "installed jsonwebtoken", "result": "FAIL",
14//       "reason": "package not found in any lockfile" }
15//   ]
16// }
17//
18// Fallback: when claimcheck is not installed, the engine checks session_actions
19// in SQLite (the original behaviour).
20
21use std::path::Path;
22use std::process::Command;
23
24use rusqlite::{params, Connection};
25use serde::{Deserialize, Serialize};
26use thiserror::Error;
27
28// ── Error ─────────────────────────────────────────────────────────────────────
29
30#[derive(Debug, Error)]
31pub enum VerifyError {
32    #[error("database error: {0}")]
33    Database(String),
34    #[error("serialization error: {0}")]
35    Serialization(String),
36    #[error("claimcheck error: {0}")]
37    ClaimCheck(String),
38}
39
40impl From<rusqlite::Error> for VerifyError {
41    fn from(e: rusqlite::Error) -> Self {
42        VerifyError::Database(e.to_string())
43    }
44}
45
46impl From<serde_json::Error> for VerifyError {
47    fn from(e: serde_json::Error) -> Self {
48        VerifyError::Serialization(e.to_string())
49    }
50}
51
52// ── claimcheck binary bridge ──────────────────────────────────────────────────
53
54/// Returns true if the `claimcheck` binary is on PATH.
55pub fn claimcheck_available() -> bool {
56    Command::new("claimcheck").arg("--help").output().is_ok()
57}
58
59/// Raw JSON output from `claimcheck --json`.
60#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct ClaimCheckReport {
62    pub truth_score: String,
63    pub summary: ClaimCheckSummary,
64    pub claims: Vec<ClaimCheckClaim>,
65}
66
67#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct ClaimCheckSummary {
69    pub total: u32,
70    pub pass: u32,
71    pub fail: u32,
72    pub unverifiable: u32,
73}
74
75#[derive(Debug, Clone, Serialize, Deserialize)]
76pub struct ClaimCheckClaim {
77    pub claim_type: String,
78    pub raw_text: String,
79    pub result: String, // "PASS", "FAIL", "UNVERIFIABLE"
80    pub reason: Option<String>,
81}
82
83/// Run `claimcheck <transcript_path> --json --project-dir <project_dir>`.
84/// Optionally pass `--baseline <ref>` and `--retest`.
85pub fn run_claimcheck(
86    transcript_path: &Path,
87    project_dir: &Path,
88    baseline: Option<&str>,
89    retest: bool,
90    test_cmd: Option<&str>,
91) -> Result<ClaimCheckReport, VerifyError> {
92    let mut cmd = Command::new("claimcheck");
93    cmd.arg(transcript_path)
94        .arg("--json")
95        .arg("--project-dir")
96        .arg(project_dir);
97
98    if let Some(b) = baseline {
99        cmd.arg("--baseline").arg(b);
100    }
101    if retest {
102        cmd.arg("--retest");
103        if let Some(tc) = test_cmd {
104            cmd.arg("--test-cmd").arg(tc);
105        }
106    }
107
108    let output = cmd.output().map_err(|e| VerifyError::ClaimCheck(e.to_string()))?;
109
110    // claimcheck exits 0 even when claims fail — parse stdout regardless
111    let stdout = String::from_utf8_lossy(&output.stdout);
112    if stdout.trim().is_empty() {
113        let stderr = String::from_utf8_lossy(&output.stderr);
114        return Err(VerifyError::ClaimCheck(format!(
115            "claimcheck produced no output. stderr: {stderr}"
116        )));
117    }
118
119    serde_json::from_str(&stdout).map_err(|e| {
120        VerifyError::ClaimCheck(format!("failed to parse claimcheck JSON: {e}\noutput: {stdout}"))
121    })
122}
123
124// ── Domain types (shared between claimcheck bridge and fallback engine) ───────
125
126#[derive(Debug, Clone, Serialize, Deserialize)]
127pub struct AgentClaim {
128    pub action_type: String,
129    pub resource: String,
130    pub claimed_at: String,
131}
132
133#[derive(Debug, Clone, Serialize, Deserialize)]
134pub struct SessionAction {
135    pub step_number: i64,
136    pub timestamp: String,
137    pub action_type: String,
138    pub agent_pid: i64,
139    pub payload: String,
140}
141
142#[derive(Debug, Clone, Serialize, Deserialize)]
143pub struct SessionEvidence {
144    pub session_id: String,
145    pub actions: Vec<SessionAction>,
146}
147
148#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
149#[serde(tag = "verdict", content = "reason")]
150pub enum ClaimVerdict {
151    Pass,
152    Fail,
153    Inconclusive { reason: String },
154}
155
156#[derive(Debug, Clone, Serialize, Deserialize)]
157pub struct ClaimResult {
158    pub claim: AgentClaim,
159    pub verdict: ClaimVerdict,
160    pub discrepancies: Vec<String>,
161}
162
163#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
164pub enum VerificationStatus {
165    Verified,
166    Unverified,
167    Inconclusive,
168}
169
170impl std::fmt::Display for VerificationStatus {
171    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
172        match self {
173            VerificationStatus::Verified => write!(f, "Verified"),
174            VerificationStatus::Unverified => write!(f, "Unverified"),
175            VerificationStatus::Inconclusive => write!(f, "Inconclusive"),
176        }
177    }
178}
179
180/// Full verification report — produced either by claimcheck or the fallback engine.
181#[derive(Debug, Clone, Serialize, Deserialize)]
182pub struct VerificationReport {
183    pub session_id: String,
184    pub overall_status: VerificationStatus,
185    pub claims: Vec<ClaimResult>,
186    /// Set when the real claimcheck binary was used.
187    pub truth_score: Option<String>,
188    /// Raw claimcheck output when available.
189    pub claimcheck_raw: Option<ClaimCheckReport>,
190}
191
192// ── VerificationEngine ────────────────────────────────────────────────────────
193
194pub struct VerificationEngine {
195    pub db: Connection,
196}
197
198impl VerificationEngine {
199    pub fn new(db: Connection) -> Self {
200        Self { db }
201    }
202
203    /// Verify a session using the real claimcheck binary when available.
204    ///
205    /// `transcript_path` — path to a `.jsonl` or `.md` transcript file exported
206    ///   from Claude Code, Cursor, or any supported tool.
207    /// `project_dir` — the project root claimcheck should check against.
208    /// `baseline` — git ref for the session window (e.g. "HEAD~3", "main").
209    /// `retest` — re-run tests to verify test claims.
210    pub fn verify_with_claimcheck(
211        &self,
212        session_id: &str,
213        transcript_path: &Path,
214        project_dir: &Path,
215        baseline: Option<&str>,
216        retest: bool,
217        test_cmd: Option<&str>,
218    ) -> Result<VerificationReport, VerifyError> {
219        let cc = run_claimcheck(transcript_path, project_dir, baseline, retest, test_cmd)?;
220
221        // Map claimcheck results into our domain types
222        let claims: Vec<ClaimResult> = cc.claims.iter().map(|c| {
223            let verdict = match c.result.as_str() {
224                "PASS" => ClaimVerdict::Pass,
225                "FAIL" => ClaimVerdict::Fail,
226                _ => ClaimVerdict::Inconclusive {
227                    reason: c.reason.clone().unwrap_or_else(|| "unverifiable".to_string()),
228                },
229            };
230            let discrepancies = if let Some(ref r) = c.reason {
231                vec![r.clone()]
232            } else {
233                vec![]
234            };
235            ClaimResult {
236                claim: AgentClaim {
237                    action_type: c.claim_type.clone(),
238                    resource: c.raw_text.clone(),
239                    claimed_at: String::new(),
240                },
241                verdict,
242                discrepancies,
243            }
244        }).collect();
245
246        let overall_status = if cc.summary.fail > 0 {
247            VerificationStatus::Unverified
248        } else if cc.summary.pass > 0 {
249            VerificationStatus::Verified
250        } else {
251            VerificationStatus::Inconclusive
252        };
253
254        let report = VerificationReport {
255            session_id: session_id.to_string(),
256            overall_status,
257            claims,
258            truth_score: Some(cc.truth_score.clone()),
259            claimcheck_raw: Some(cc),
260        };
261
262        self.store_report_full(&report)?;
263        Ok(report)
264    }
265
266    /// Fallback: verify using session_actions in SQLite (no claimcheck binary needed).
267    pub fn verify_session(
268        &self,
269        session_id: &str,
270        claims: Vec<AgentClaim>,
271    ) -> Result<VerificationReport, VerifyError> {
272        let actions = self.load_actions(session_id)?;
273        let evidence = SessionEvidence { session_id: session_id.to_string(), actions };
274
275        let mut results = Vec::with_capacity(claims.len());
276        for claim in claims {
277            let verdict = self.verify_claim(&claim, &evidence);
278            let discrepancies = match &verdict {
279                ClaimVerdict::Fail => vec![format!(
280                    "no recorded {} action for resource '{}'",
281                    claim.action_type, claim.resource
282                )],
283                _ => vec![],
284            };
285            results.push(ClaimResult { claim, verdict, discrepancies });
286        }
287
288        let overall_status = derive_status(&results);
289        let report = VerificationReport {
290            session_id: session_id.to_string(),
291            overall_status,
292            claims: results,
293            truth_score: None,
294            claimcheck_raw: None,
295        };
296
297        self.store_report_full(&report)?;
298        Ok(report)
299    }
300
301    pub fn verify_claim(&self, claim: &AgentClaim, evidence: &SessionEvidence) -> ClaimVerdict {
302        if evidence.actions.is_empty() {
303            return ClaimVerdict::Inconclusive { reason: "no evidence".to_string() };
304        }
305        let matched = evidence.actions.iter().any(|a| {
306            a.action_type == claim.action_type && action_matches_resource(a, &claim.resource)
307        });
308        if matched { ClaimVerdict::Pass } else { ClaimVerdict::Fail }
309    }
310
311    fn load_actions(&self, session_id: &str) -> Result<Vec<SessionAction>, VerifyError> {
312        let mut stmt = self.db.prepare(
313            "SELECT step_number, timestamp, action_type, agent_pid, payload \
314             FROM session_actions WHERE session_id = ?1 ORDER BY step_number ASC",
315        )?;
316        let rows = stmt.query_map(params![session_id], |row| {
317            Ok(SessionAction {
318                step_number: row.get(0)?,
319                timestamp: row.get(1)?,
320                action_type: row.get(2)?,
321                agent_pid: row.get(3)?,
322                payload: row.get(4)?,
323            })
324        })?;
325        let mut actions = Vec::new();
326        for row in rows {
327            actions.push(row?);
328        }
329        Ok(actions)
330    }
331
332    fn store_report_full(&self, report: &VerificationReport) -> Result<(), VerifyError> {
333        let json = serde_json::to_string(report)?;
334        self.db.execute(
335            "INSERT INTO verification_reports (session_id, timestamp, overall_status, report_json) \
336             VALUES (?1, datetime('now'), ?2, ?3)",
337            params![report.session_id, report.overall_status.to_string(), json],
338        )?;
339        Ok(())
340    }
341}
342
343fn action_matches_resource(action: &SessionAction, resource: &str) -> bool {
344    if let Ok(v) = serde_json::from_str::<serde_json::Value>(&action.payload) {
345        for key in &["resource", "path", "url", "command"] {
346            if let Some(val) = v.get(key).and_then(|x| x.as_str()) {
347                if val == resource {
348                    return true;
349                }
350            }
351        }
352    }
353    false
354}
355
356fn derive_status(results: &[ClaimResult]) -> VerificationStatus {
357    if results.is_empty() {
358        return VerificationStatus::Inconclusive;
359    }
360    if results.iter().any(|r| r.verdict == ClaimVerdict::Fail) {
361        return VerificationStatus::Unverified;
362    }
363    if results.iter().all(|r| r.verdict == ClaimVerdict::Pass) {
364        VerificationStatus::Verified
365    } else {
366        VerificationStatus::Inconclusive
367    }
368}
369
370#[cfg(test)]
371mod tests {
372    use super::*;
373    use rusqlite::Connection;
374
375    fn in_memory_db() -> Connection {
376        let conn = Connection::open_in_memory().unwrap();
377        conn.execute_batch(SCHEMA).unwrap();
378        conn
379    }
380
381    const SCHEMA: &str = "
382        CREATE TABLE IF NOT EXISTS sessions (
383            id TEXT PRIMARY KEY, started_at TEXT NOT NULL, ended_at TEXT,
384            status TEXT NOT NULL DEFAULT 'Active'
385        );
386        CREATE TABLE IF NOT EXISTS session_actions (
387            id INTEGER PRIMARY KEY AUTOINCREMENT,
388            session_id TEXT NOT NULL REFERENCES sessions(id),
389            step_number INTEGER NOT NULL,
390            timestamp TEXT NOT NULL,
391            action_type TEXT NOT NULL,
392            agent_pid INTEGER NOT NULL,
393            payload TEXT NOT NULL,
394            UNIQUE(session_id, step_number)
395        );
396        CREATE TABLE IF NOT EXISTS verification_reports (
397            id INTEGER PRIMARY KEY AUTOINCREMENT,
398            session_id TEXT NOT NULL REFERENCES sessions(id),
399            timestamp TEXT NOT NULL,
400            overall_status TEXT NOT NULL,
401            report_json TEXT NOT NULL
402        );
403    ";
404
405    fn insert_session(conn: &Connection, id: &str) {
406        conn.execute(
407            "INSERT INTO sessions (id, started_at) VALUES (?1, datetime('now'))",
408            params![id],
409        ).unwrap();
410    }
411
412    fn insert_action(conn: &Connection, session_id: &str, step: i64, action_type: &str, payload: &str) {
413        conn.execute(
414            "INSERT INTO session_actions \
415             (session_id, step_number, timestamp, action_type, agent_pid, payload) \
416             VALUES (?1, ?2, datetime('now'), ?3, 1, ?4)",
417            params![session_id, step, action_type, payload],
418        ).unwrap();
419    }
420
421    // ── claimcheck binary availability ────────────────────────────────────────
422
423    #[test]
424    fn claimcheck_available_check_does_not_panic() {
425        let _ = claimcheck_available();
426    }
427
428    #[test]
429    fn run_claimcheck_on_nonexistent_transcript_returns_error() {
430        if !claimcheck_available() {
431            return; // skip when not installed
432        }
433        let result = run_claimcheck(
434            Path::new("/tmp/nonexistent-hawk-transcript.jsonl"),
435            Path::new("."),
436            None,
437            false,
438            None,
439        );
440        assert!(result.is_err());
441    }
442
443    #[test]
444    fn run_claimcheck_with_real_transcript_when_installed() {
445        if !claimcheck_available() {
446            return;
447        }
448        // write a minimal Claude Code JSONL transcript to a temp file
449        let dir = tempfile::TempDir::new().unwrap();
450        let transcript = dir.path().join("session.jsonl");
451        std::fs::write(
452            &transcript,
453            r#"{"role":"user","content":"Create a file"}
454{"role":"assistant","content":"I created /tmp/hawk-test-claimcheck.txt with the content."}
455"#,
456        ).unwrap();
457
458        let result = run_claimcheck(&transcript, dir.path(), None, false, None);
459        // may succeed or fail depending on whether the file exists — just verify it runs
460        match result {
461            Ok(report) => {
462                assert!(!report.truth_score.is_empty());
463                // truth_score is either "N/A" or "X%"
464            }
465            Err(VerifyError::ClaimCheck(_)) => {
466                // claimcheck ran but produced unexpected output — acceptable
467            }
468            Err(e) => panic!("unexpected error: {e}"),
469        }
470    }
471
472    // ── fallback engine (SQLite-based) ────────────────────────────────────────
473
474    #[test]
475    fn inconclusive_when_no_evidence() {
476        let engine = VerificationEngine::new(in_memory_db());
477        let claim = AgentClaim {
478            action_type: "file_write".into(),
479            resource: "/tmp/out.txt".into(),
480            claimed_at: "2024-01-01T00:00:00Z".into(),
481        };
482        let evidence = SessionEvidence { session_id: "s1".into(), actions: vec![] };
483        assert_eq!(
484            engine.verify_claim(&claim, &evidence),
485            ClaimVerdict::Inconclusive { reason: "no evidence".into() }
486        );
487    }
488
489    #[test]
490    fn pass_when_matching_action_exists() {
491        let engine = VerificationEngine::new(in_memory_db());
492        let claim = AgentClaim {
493            action_type: "file_write".into(),
494            resource: "/tmp/out.txt".into(),
495            claimed_at: "2024-01-01T00:00:00Z".into(),
496        };
497        let evidence = SessionEvidence {
498            session_id: "s1".into(),
499            actions: vec![SessionAction {
500                step_number: 1,
501                timestamp: "2024-01-01T00:00:00Z".into(),
502                action_type: "file_write".into(),
503                agent_pid: 42,
504                payload: r#"{"path":"/tmp/out.txt"}"#.into(),
505            }],
506        };
507        assert_eq!(engine.verify_claim(&claim, &evidence), ClaimVerdict::Pass);
508    }
509
510    #[test]
511    fn fail_when_no_matching_action() {
512        let engine = VerificationEngine::new(in_memory_db());
513        let claim = AgentClaim {
514            action_type: "api_call".into(),
515            resource: "https://api.openai.com/v1/chat".into(),
516            claimed_at: "2024-01-01T00:00:00Z".into(),
517        };
518        let evidence = SessionEvidence {
519            session_id: "s1".into(),
520            actions: vec![SessionAction {
521                step_number: 1,
522                timestamp: "2024-01-01T00:00:00Z".into(),
523                action_type: "file_write".into(),
524                agent_pid: 42,
525                payload: r#"{"path":"/tmp/out.txt"}"#.into(),
526            }],
527        };
528        assert_eq!(engine.verify_claim(&claim, &evidence), ClaimVerdict::Fail);
529    }
530
531    #[test]
532    fn verify_session_all_pass_returns_verified() {
533        let conn = in_memory_db();
534        insert_session(&conn, "sess-pass");
535        insert_action(&conn, "sess-pass", 1, "file_write", r#"{"path":"/tmp/result.txt"}"#);
536        let engine = VerificationEngine::new(conn);
537        let claims = vec![AgentClaim {
538            action_type: "file_write".into(),
539            resource: "/tmp/result.txt".into(),
540            claimed_at: "2024-01-01T00:00:00Z".into(),
541        }];
542        let report = engine.verify_session("sess-pass", claims).unwrap();
543        assert_eq!(report.overall_status, VerificationStatus::Verified);
544        assert_eq!(report.claims[0].verdict, ClaimVerdict::Pass);
545        assert!(report.truth_score.is_none()); // fallback path
546    }
547
548    #[test]
549    fn verify_session_any_fail_returns_unverified() {
550        let conn = in_memory_db();
551        insert_session(&conn, "sess-fail");
552        insert_action(&conn, "sess-fail", 1, "file_write", r#"{"path":"/tmp/result.txt"}"#);
553        let engine = VerificationEngine::new(conn);
554        let claims = vec![
555            AgentClaim {
556                action_type: "file_write".into(),
557                resource: "/tmp/result.txt".into(),
558                claimed_at: "2024-01-01T00:00:00Z".into(),
559            },
560            AgentClaim {
561                action_type: "api_call".into(),
562                resource: "https://api.openai.com/v1/chat".into(),
563                claimed_at: "2024-01-01T00:00:00Z".into(),
564            },
565        ];
566        let report = engine.verify_session("sess-fail", claims).unwrap();
567        assert_eq!(report.overall_status, VerificationStatus::Unverified);
568    }
569
570    #[test]
571    fn verify_session_no_actions_returns_inconclusive() {
572        let conn = in_memory_db();
573        insert_session(&conn, "sess-inc");
574        let engine = VerificationEngine::new(conn);
575        let claims = vec![AgentClaim {
576            action_type: "file_write".into(),
577            resource: "/tmp/out.txt".into(),
578            claimed_at: "2024-01-01T00:00:00Z".into(),
579        }];
580        let report = engine.verify_session("sess-inc", claims).unwrap();
581        assert_eq!(report.overall_status, VerificationStatus::Inconclusive);
582    }
583
584    #[test]
585    fn verify_session_stores_report_in_sqlite() {
586        let conn = in_memory_db();
587        insert_session(&conn, "sess-store");
588        insert_action(&conn, "sess-store", 1, "file_write", r#"{"path":"/tmp/x.txt"}"#);
589        let engine = VerificationEngine::new(conn);
590        let claims = vec![AgentClaim {
591            action_type: "file_write".into(),
592            resource: "/tmp/x.txt".into(),
593            claimed_at: "2024-01-01T00:00:00Z".into(),
594        }];
595        engine.verify_session("sess-store", claims).unwrap();
596        let count: i64 = engine.db.query_row(
597            "SELECT COUNT(*) FROM verification_reports WHERE session_id = 'sess-store'",
598            [],
599            |row| row.get(0),
600        ).unwrap();
601        assert_eq!(count, 1);
602    }
603
604    #[test]
605    fn verify_session_empty_claims_returns_inconclusive() {
606        let conn = in_memory_db();
607        insert_session(&conn, "sess-empty");
608        let engine = VerificationEngine::new(conn);
609        let report = engine.verify_session("sess-empty", vec![]).unwrap();
610        assert_eq!(report.overall_status, VerificationStatus::Inconclusive);
611    }
612
613    #[test]
614    fn discrepancies_populated_on_fail() {
615        let conn = in_memory_db();
616        insert_session(&conn, "sess-disc");
617        insert_action(&conn, "sess-disc", 1, "file_write", r#"{"path":"/tmp/other.txt"}"#);
618        let engine = VerificationEngine::new(conn);
619        let claims = vec![AgentClaim {
620            action_type: "file_write".into(),
621            resource: "/tmp/missing.txt".into(),
622            claimed_at: "2024-01-01T00:00:00Z".into(),
623        }];
624        let report = engine.verify_session("sess-disc", claims).unwrap();
625        assert_eq!(report.claims[0].verdict, ClaimVerdict::Fail);
626        assert!(!report.claims[0].discrepancies.is_empty());
627    }
628
629    #[test]
630    fn api_call_matched_via_url_field() {
631        let engine = VerificationEngine::new(in_memory_db());
632        let claim = AgentClaim {
633            action_type: "api_call".into(),
634            resource: "https://api.openai.com/v1/chat".into(),
635            claimed_at: "2024-01-01T00:00:00Z".into(),
636        };
637        let evidence = SessionEvidence {
638            session_id: "s1".into(),
639            actions: vec![SessionAction {
640                step_number: 1,
641                timestamp: "2024-01-01T00:00:00Z".into(),
642                action_type: "api_call".into(),
643                agent_pid: 42,
644                payload: r#"{"url":"https://api.openai.com/v1/chat"}"#.into(),
645            }],
646        };
647        assert_eq!(engine.verify_claim(&claim, &evidence), ClaimVerdict::Pass);
648    }
649
650    #[test]
651    fn command_exec_matched_via_command_field() {
652        let engine = VerificationEngine::new(in_memory_db());
653        let claim = AgentClaim {
654            action_type: "command_exec".into(),
655            resource: "python3".into(),
656            claimed_at: "2024-01-01T00:00:00Z".into(),
657        };
658        let evidence = SessionEvidence {
659            session_id: "s1".into(),
660            actions: vec![SessionAction {
661                step_number: 1,
662                timestamp: "2024-01-01T00:00:00Z".into(),
663                action_type: "command_exec".into(),
664                agent_pid: 42,
665                payload: r#"{"command":"python3"}"#.into(),
666            }],
667        };
668        assert_eq!(engine.verify_claim(&claim, &evidence), ClaimVerdict::Pass);
669    }
670}
openhawk_verify/lib.rs

openhawk_verify/
lib.rs