truth-mirror 0.2.0

//! CLAIM parsing and deterministic pre-commit gate.

use std::fmt;

use thiserror::Error;

pub const DEFAULT_FAKE_MARKERS: &[&str] = &["mock-as-real", "TODO-as-done"];

/// Default evidence-pointer prefixes that make a CLAIM's evidence look real.
pub const DEFAULT_EVIDENCE_PATTERNS: &[&str] = &[
    "file:",
    "path:",
    "log:",
    "test:",
    "tests:",
    "screenshot:",
    "artifact:",
    "ci:",
    "bead:",
    "openspec:",
    "commit:",
];

/// Diff paths excluded from the fake-marker scan: documentation and specs mention
/// markers to *describe* them, not to fake behavior. Matched by prefix or suffix.
pub const DEFAULT_MARKER_IGNORE_PATHS: &[&str] = &[".md", "openspec/", "docs/"];

/// Resolved deterministic-gate policy (markers, evidence patterns, ignore paths).
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct GatePolicy {
    pub fake_markers: Vec<String>,
    pub evidence_patterns: Vec<String>,
    pub marker_ignore_paths: Vec<String>,
}

impl Default for GatePolicy {
    fn default() -> Self {
        Self {
            fake_markers: owned(DEFAULT_FAKE_MARKERS),
            evidence_patterns: owned(DEFAULT_EVIDENCE_PATTERNS),
            marker_ignore_paths: owned(DEFAULT_MARKER_IGNORE_PATHS),
        }
    }
}

fn owned(values: &[&str]) -> Vec<String> {
    values.iter().map(|value| (*value).to_owned()).collect()
}

#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Claim {
    pub what: String,
    pub verification: String,
    pub evidence: Vec<EvidenceRef>,
}

impl Claim {
    pub fn new(
        what: impl Into<String>,
        verification: impl Into<String>,
        evidence: Vec<EvidenceRef>,
    ) -> Result<Self, ClaimError> {
        let claim = Self {
            what: normalize_field(what.into()),
            verification: normalize_field(verification.into()),
            evidence,
        };
        claim.validate()?;
        Ok(claim)
    }

    pub fn parse(input: &str) -> Result<Self, ClaimError> {
        Self::parse_with(input, DEFAULT_EVIDENCE_PATTERNS)
    }

    /// Parse a CLAIM, accepting the given evidence-pointer patterns (in addition
    /// to the built-in heuristics).
    pub fn parse_with<S: AsRef<str>>(input: &str, patterns: &[S]) -> Result<Self, ClaimError> {
        let line = input
            .lines()
            .map(str::trim)
            .find(|line| line.starts_with("CLAIM:"))
            .ok_or(ClaimError::MissingClaim)?;

        Self::parse_line_with(line, patterns)
    }

    pub fn parse_line(line: &str) -> Result<Self, ClaimError> {
        Self::parse_line_with(line, DEFAULT_EVIDENCE_PATTERNS)
    }

    pub fn parse_line_with<S: AsRef<str>>(line: &str, patterns: &[S]) -> Result<Self, ClaimError> {
        let mut segments = line.split('|').map(str::trim);
        let claim_segment = segments
            .next()
            .and_then(|segment| segment.strip_prefix("CLAIM:"))
            .ok_or(ClaimError::MissingClaim)?;

        let mut verification = None;
        let mut evidence = Vec::new();

        for segment in segments {
            if let Some(value) = field_value(segment, &["verified", "verification", "how"]) {
                verification = Some(normalize_field(value.to_owned()));
                continue;
            }

            if let Some(value) = field_value(segment, &["evidence", "evidence-pointer"]) {
                for item in value.split(',') {
                    evidence.push(EvidenceRef::parse_with(item, patterns)?);
                }
            }
        }

        Self::new(
            claim_segment,
            verification.ok_or(ClaimError::MissingVerification)?,
            evidence,
        )
    }

    pub fn to_line(&self) -> String {
        let evidence = self
            .evidence
            .iter()
            .map(EvidenceRef::as_str)
            .collect::<Vec<_>>()
            .join(", ");

        format!(
            "CLAIM: {} | verified: {} | evidence: {}",
            self.what, self.verification, evidence
        )
    }

    fn validate(&self) -> Result<(), ClaimError> {
        if self.what.is_empty() {
            return Err(ClaimError::EmptyWhat);
        }

        if self.verification.is_empty() {
            return Err(ClaimError::MissingVerification);
        }

        if self.evidence.is_empty() {
            return Err(ClaimError::MissingEvidence);
        }

        Ok(())
    }
}

#[derive(Clone, Debug, Eq, PartialEq)]
pub struct EvidenceRef(String);

impl EvidenceRef {
    pub fn parse(value: &str) -> Result<Self, ClaimError> {
        Self::parse_with(value, DEFAULT_EVIDENCE_PATTERNS)
    }

    pub fn parse_with<S: AsRef<str>>(value: &str, patterns: &[S]) -> Result<Self, ClaimError> {
        let value = normalize_field(value.to_owned());
        if value.is_empty() {
            return Err(ClaimError::MissingEvidence);
        }

        let normalized = value.to_ascii_lowercase();
        if matches!(
            normalized.as_str(),
            "none" | "n/a" | "na" | "todo" | "tbd" | "later" | "missing"
        ) || !looks_like_pointer(&value, patterns)
        {
            return Err(ClaimError::InvalidEvidence { value });
        }

        Ok(Self(value))
    }

    pub fn as_str(&self) -> &str {
        &self.0
    }
}

impl fmt::Display for EvidenceRef {
    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
        self.0.fmt(formatter)
    }
}

#[derive(Clone, Debug, Eq, Error, PartialEq)]
pub enum ClaimError {
    #[error("missing CLAIM: line")]
    MissingClaim,
    #[error("CLAIM: what field is empty")]
    EmptyWhat,
    #[error("CLAIM: missing verified field")]
    MissingVerification,
    #[error("CLAIM: missing evidence pointer")]
    MissingEvidence,
    #[error("CLAIM: invalid evidence pointer {value:?}")]
    InvalidEvidence { value: String },
}

#[derive(Clone, Debug, Eq, Error, PartialEq)]
pub enum GateFailure {
    #[error("missing CLAIM: line")]
    MissingClaim,
    #[error("completion wording lacks evidence pointer for word {word:?}")]
    CompletionWithoutEvidence { word: String },
    #[error("{0}")]
    InvalidClaim(#[from] ClaimError),
    #[error("fake marker {marker:?} found at diff line {line}")]
    FakeMarker { marker: String, line: usize },
}

pub fn evaluate_commit_message(
    commit_message: &str,
    claim_file: Option<&str>,
    diff: Option<&str>,
    policy: &GatePolicy,
) -> Result<Claim, GateFailure> {
    let claim_source = if commit_message
        .lines()
        .any(|line| line.trim().starts_with("CLAIM:"))
    {
        commit_message
    } else {
        claim_file.ok_or(GateFailure::MissingClaim)?
    };

    let completion_word = completion_word(commit_message);
    let claim =
        Claim::parse_with(claim_source, &policy.evidence_patterns).map_err(|error| {
            match (&error, completion_word) {
                (ClaimError::MissingEvidence | ClaimError::InvalidEvidence { .. }, Some(word)) => {
                    GateFailure::CompletionWithoutEvidence {
                        word: word.to_owned(),
                    }
                }
                _ => GateFailure::InvalidClaim(error),
            }
        })?;

    if let Some(diff) = diff
        && let Some(marker) =
            first_fake_marker(diff, &policy.fake_markers, &policy.marker_ignore_paths)
    {
        return Err(marker);
    }

    Ok(claim)
}

/// Whether a new-side diff path is excluded from the fake-marker scan.
fn path_is_ignored<S: AsRef<str>>(path: &str, ignore_paths: &[S]) -> bool {
    ignore_paths.iter().any(|ignore| {
        let ignore = ignore.as_ref();
        !ignore.is_empty() && (path.starts_with(ignore) || path.ends_with(ignore))
    })
}

pub fn first_fake_marker<S: AsRef<str>>(
    diff: &str,
    fake_markers: &[String],
    ignore_paths: &[S],
) -> Option<GateFailure> {
    let markers = normalized_markers(fake_markers);
    let mut ignored_file = false;
    for (index, line) in diff.lines().enumerate() {
        // Track the current file from its `+++ b/<path>` header so documentation
        // and spec files (which mention markers to describe them) are skipped.
        if let Some(rest) = line.strip_prefix("+++ ") {
            let path = rest.strip_prefix("b/").unwrap_or(rest);
            ignored_file = path_is_ignored(path, ignore_paths);
            continue;
        }

        // Only lines the commit actually INTRODUCES count. Context and removed
        // lines are not this commit's doing — flagging them would trip on any
        // change made near an unrelated pre-existing marker (including a source
        // file that legitimately *defines* the marker token).
        let Some(added) = line.strip_prefix('+') else {
            continue;
        };
        if added.starts_with("++") || ignored_file {
            continue;
        }

        let line_lower = added.to_ascii_lowercase();
        if let Some(marker) = markers
            .iter()
            .find(|marker| line_lower.contains(marker.normalized.as_str()))
        {
            return Some(GateFailure::FakeMarker {
                marker: marker.original.clone(),
                line: index + 1,
            });
        }
    }

    None
}

struct Marker {
    original: String,
    normalized: String,
}

fn normalized_markers(fake_markers: &[String]) -> Vec<Marker> {
    let source: Vec<String> = if fake_markers.is_empty() {
        DEFAULT_FAKE_MARKERS
            .iter()
            .map(|marker| (*marker).to_owned())
            .collect()
    } else {
        fake_markers.to_vec()
    };

    source
        .into_iter()
        .filter(|marker| !marker.trim().is_empty())
        .map(|marker| Marker {
            normalized: marker.trim().to_ascii_lowercase(),
            original: marker.trim().to_owned(),
        })
        .collect()
}

fn completion_word(input: &str) -> Option<&'static str> {
    const WORDS: &[&str] = &[
        "done",
        "complete",
        "completed",
        "verified",
        "fixed",
        "passing",
    ];

    input
        .split(|character: char| !character.is_ascii_alphanumeric())
        .find_map(|word| {
            let normalized = word.to_ascii_lowercase();
            WORDS
                .iter()
                .copied()
                .find(|candidate| *candidate == normalized)
        })
}

fn field_value<'a>(segment: &'a str, names: &[&str]) -> Option<&'a str> {
    let (name, value) = segment.split_once(':')?;
    names
        .iter()
        .any(|candidate| name.trim().eq_ignore_ascii_case(candidate))
        .then_some(value.trim())
}

fn looks_like_pointer<S: AsRef<str>>(value: &str, patterns: &[S]) -> bool {
    let lower = value.to_ascii_lowercase();
    lower.contains("://")
        || patterns
            .iter()
            .any(|prefix| lower.starts_with(&prefix.as_ref().to_ascii_lowercase()))
        || value.contains('/')
        || value.contains('.')
}

fn normalize_field(value: String) -> String {
    value.split_whitespace().collect::<Vec<_>>().join(" ")
}

#[cfg(test)]
mod tests {
    use proptest::prelude::*;

    use super::{
        Claim, ClaimError, EvidenceRef, GateFailure, GatePolicy, evaluate_commit_message,
        first_fake_marker,
    };

    /// No ignore paths — every added line is scanned.
    const NO_IGNORE: &[&str] = &[];

    #[test]
    fn parses_claim_line_with_evidence() {
        let claim = Claim::parse(
            "feat: thing\n\nCLAIM: add parser | verified: cargo test | evidence: tests:cargo-test",
        )
        .unwrap();

        assert_eq!(claim.what, "add parser");
        assert_eq!(claim.verification, "cargo test");
        assert_eq!(claim.evidence[0].as_str(), "tests:cargo-test");
    }

    #[test]
    fn rejects_missing_claim() {
        let error = Claim::parse("feat: thing").unwrap_err();

        assert_eq!(error, ClaimError::MissingClaim);
    }

    #[test]
    fn rejects_missing_evidence() {
        let error = Claim::parse("CLAIM: complete parser | verified: cargo test").unwrap_err();

        assert_eq!(error, ClaimError::MissingEvidence);
    }

    #[test]
    fn reports_completion_word_without_evidence() {
        let error = evaluate_commit_message(
            "feat: parser\n\nCLAIM: complete parser | verified: cargo test",
            None,
            None,
            &GatePolicy::default(),
        )
        .unwrap_err();

        assert_eq!(
            error,
            GateFailure::CompletionWithoutEvidence {
                word: "complete".to_owned()
            }
        );
    }

    #[test]
    fn accepts_claim_file_fallback() {
        let claim = evaluate_commit_message(
            "feat: parser",
            Some("CLAIM: add parser | verified: cargo test | evidence: tests:cargo-test"),
            None,
            &GatePolicy::default(),
        )
        .unwrap();

        assert_eq!(claim.what, "add parser");
    }

    #[test]
    fn custom_evidence_pattern_is_accepted() {
        let policy = GatePolicy {
            evidence_patterns: vec!["jira:".to_owned()],
            ..GatePolicy::default()
        };
        let claim = evaluate_commit_message(
            "chore: thing\n\nCLAIM: do thing | verified: manual | evidence: jira:PROJ-42",
            None,
            None,
            &policy,
        )
        .unwrap();

        assert_eq!(claim.evidence[0].as_str(), "jira:PROJ-42");
    }

    #[test]
    fn marker_in_ignored_doc_path_is_not_flagged() {
        // Same added marker line, but under a doc path in the diff → skipped.
        let marker = ["mock", "as", "real"].join("-");
        let diff = format!("diff --git a/docs/x.md b/docs/x.md\n+++ b/docs/x.md\n+ {marker}");

        let policy = GatePolicy::default();
        assert!(
            first_fake_marker(&diff, &policy.fake_markers, &policy.marker_ignore_paths).is_none()
        );

        // Under a code path, the same line IS flagged.
        let code_diff = format!("diff --git a/src/x.rs b/src/x.rs\n+++ b/src/x.rs\n+ {marker}");
        assert!(
            first_fake_marker(
                &code_diff,
                &policy.fake_markers,
                &policy.marker_ignore_paths
            )
            .is_some()
        );
    }

    #[test]
    fn finds_default_fake_marker_with_location() {
        let done = ["TODO", "as", "done"].join("-");
        let diff = format!("diff --git a/x b/x\n+ {done}");
        let error = first_fake_marker(&diff, &[], NO_IGNORE).unwrap();

        assert_eq!(
            error,
            GateFailure::FakeMarker {
                marker: "TODO-as-done".to_owned(),
                line: 2
            }
        );
    }

    #[test]
    fn context_and_removed_lines_do_not_trip_fake_marker() {
        // Built at runtime so the literal marker token never appears in this file
        // (truth-mirror's own gate would otherwise flag this very line).
        let marker = ["mock", "as", "real"].join("-");
        // A context line (space prefix) and a removed line (-) that merely mention
        // the marker must not be flagged; only added (+) content counts.
        let diff = format!(
            "diff --git a/x b/x\n const MARKERS = [\"{marker}\"];\n- old_line_with {marker}\n+ let honest = compute();"
        );

        assert!(first_fake_marker(&diff, &[], NO_IGNORE).is_none());
    }

    #[test]
    fn added_line_with_marker_is_flagged() {
        let marker = ["mock", "as", "real"].join("-");
        let diff = format!("diff --git a/x b/x\n+ {marker} here");
        let error = first_fake_marker(&diff, &[], NO_IGNORE).unwrap();

        assert!(matches!(error, GateFailure::FakeMarker { .. }));
    }

    #[test]
    fn configured_fake_marker_overrides_defaults() {
        let markers = vec!["pretend-pass".to_owned()];
        let error = first_fake_marker("+ pretend-pass", &markers, NO_IGNORE).unwrap();

        assert_eq!(
            error,
            GateFailure::FakeMarker {
                marker: "pretend-pass".to_owned(),
                line: 1
            }
        );
    }

    proptest! {
        #[test]
        fn claim_roundtrip_preserves_semantic_fields(
            what in "[A-Za-z0-9][A-Za-z0-9 _./:-]{0,48}",
            verification in "[A-Za-z0-9][A-Za-z0-9 _./:-]{0,48}",
            evidence_suffix in "[a-z0-9][a-z0-9_-]{0,24}",
        ) {
            let evidence = EvidenceRef::parse(&format!("tests:{evidence_suffix}")).unwrap();
            let claim = Claim::new(what, verification, vec![evidence]).unwrap();

            let parsed = Claim::parse(&claim.to_line()).unwrap();

            prop_assert_eq!(parsed, claim);
        }
    }
}