Skip to main content

destructive_command_guard/
confidence.rs

1//! Confidence scoring for pattern matches.
2//!
3//! This module provides a lightweight confidence model that helps reduce false positives
4//! by scoring how confident we are that a pattern match is truly destructive.
5//!
6//! # Design Principles
7//!
8//! 1. **Conservative by default**: When in doubt, treat as high confidence (block)
9//! 2. **Explainable**: Every confidence adjustment is tracked with a signal
10//! 3. **Fast**: Confidence scoring adds minimal overhead to evaluation
11//!
12//! # Confidence Signals
13//!
14//! The confidence score is computed from multiple signals:
15//! - **Match location**: Executed context vs data context (string literals, comments)
16//! - **Wrapper context**: Known-safe wrappers like `git commit -m`, `rg`, `echo`
17//! - **Execution operators**: Presence of `|`, `;`, `&&`, `$(...)` near match
18//! - **Sanitization**: Whether the match was in content masked by sanitization
19//!
20//! # Example
21//!
22//! ```ignore
23//! use destructive_command_guard::confidence::{compute_match_confidence, ConfidenceContext};
24//!
25//! let ctx = ConfidenceContext {
26//!     command: "git commit -m 'Fix rm -rf detection'",
27//!     sanitized_command: "git commit -m ''",  // 'rm -rf' was masked
28//!     match_start: 17,
29//!     match_end: 31,
30//! };
31//! let score = compute_match_confidence(&ctx);
32//! // score.value < 0.5 because match is in a sanitized (data) region
33//! ```
34
35use crate::context::{CommandSpans, SpanKind, classify_command};
36use smallvec::SmallVec;
37
38/// A signal that contributed to the confidence score.
39#[derive(Debug, Clone, Copy, PartialEq, Eq)]
40pub enum ConfidenceSignal {
41    /// Match is in an executed span (high confidence).
42    ExecutedSpan,
43    /// Match is in an inline code span like `bash -c` (high confidence).
44    InlineCodeSpan,
45    /// Match is in a data span like single-quoted string (low confidence).
46    DataSpan,
47    /// Match is in an argument span like `git commit -m` (low confidence).
48    ArgumentSpan,
49    /// Match is in a comment (very low confidence).
50    CommentSpan,
51    /// Match is in a heredoc body (needs deeper analysis).
52    HeredocBodySpan,
53    /// Match is in an ambiguous/unknown span (moderate confidence).
54    UnknownSpan,
55    /// Match was in content masked by sanitization (low confidence).
56    SanitizedRegion,
57    /// Match has execution operators nearby (boost confidence).
58    ExecutionOperatorsNearby,
59    /// Match is at command position (first word, high confidence).
60    CommandPosition,
61    /// Match is clearly in argument position (lower confidence).
62    ArgumentPosition,
63}
64
65impl ConfidenceSignal {
66    /// Get the confidence adjustment for this signal.
67    ///
68    /// Returns a multiplier (0.0 - 1.0) that reduces confidence,
69    /// or a value > 1.0 that boosts confidence.
70    #[must_use]
71    pub const fn weight(self) -> f32 {
72        match self {
73            // High confidence signals (executed code)
74            Self::ExecutedSpan | Self::InlineCodeSpan => 1.0,
75            Self::CommandPosition | Self::ExecutionOperatorsNearby => 1.1, // Slight boost
76            // Low confidence signals (data context)
77            Self::DataSpan => 0.1,
78            Self::CommentSpan => 0.05,
79            Self::ArgumentSpan => 0.3,
80            Self::SanitizedRegion => 0.2,
81            Self::ArgumentPosition => 0.6,
82            // Moderate confidence (ambiguous)
83            Self::HeredocBodySpan => 0.7, // Needs deeper analysis
84            Self::UnknownSpan => 0.8,     // Conservative
85        }
86    }
87
88    /// Human-readable description of this signal.
89    #[must_use]
90    pub const fn description(self) -> &'static str {
91        match self {
92            Self::ExecutedSpan => "match is in executed code",
93            Self::InlineCodeSpan => "match is in inline code (bash -c, python -c, etc.)",
94            Self::DataSpan => "match is in a data string (single-quoted)",
95            Self::CommentSpan => "match is in a comment",
96            Self::ArgumentSpan => "match is in a string argument to a safe command",
97            Self::HeredocBodySpan => "match is in a heredoc body",
98            Self::UnknownSpan => "match context is ambiguous",
99            Self::SanitizedRegion => "match was in a region masked by sanitization",
100            Self::ExecutionOperatorsNearby => "execution operators (|, ;, &&) found nearby",
101            Self::CommandPosition => "match is at command position",
102            Self::ArgumentPosition => "match is in argument position",
103        }
104    }
105}
106
107/// A confidence score with the signals that contributed to it.
108#[derive(Debug, Clone)]
109pub struct ConfidenceScore {
110    /// The final confidence value (0.0 - 1.0).
111    /// Higher values mean more confident the match is truly destructive.
112    pub value: f32,
113    /// Signals that contributed to this score (for debugging/explain).
114    pub signals: SmallVec<[ConfidenceSignal; 4]>,
115}
116
117impl Default for ConfidenceScore {
118    fn default() -> Self {
119        Self::high()
120    }
121}
122
123impl ConfidenceScore {
124    /// Create a high confidence score (default for matches).
125    #[must_use]
126    pub fn high() -> Self {
127        Self {
128            value: 1.0,
129            signals: SmallVec::new(),
130        }
131    }
132
133    /// Create a low confidence score.
134    #[must_use]
135    pub fn low(signal: ConfidenceSignal) -> Self {
136        let mut signals = SmallVec::new();
137        signals.push(signal);
138        Self {
139            value: signal.weight(),
140            signals,
141        }
142    }
143
144    /// Add a signal and adjust the score.
145    pub fn add_signal(&mut self, signal: ConfidenceSignal) {
146        self.signals.push(signal);
147        // Use multiplicative adjustment (clamped to 0.0 - 1.0)
148        self.value = (self.value * signal.weight()).clamp(0.0, 1.0);
149    }
150
151    /// Check if confidence is below a threshold.
152    #[must_use]
153    pub fn is_low(&self, threshold: f32) -> bool {
154        self.value < threshold
155    }
156
157    /// Check if confidence warrants downgrading from Deny to Warn.
158    ///
159    /// Returns true if confidence is below the warn threshold (default 0.5).
160    #[must_use]
161    pub fn should_warn(&self) -> bool {
162        self.is_low(DEFAULT_WARN_THRESHOLD)
163    }
164}
165
166/// Default threshold below which we downgrade Deny to Warn.
167pub const DEFAULT_WARN_THRESHOLD: f32 = 0.5;
168
169/// Context for computing match confidence.
170pub struct ConfidenceContext<'a> {
171    /// The original command string.
172    pub command: &'a str,
173    /// The sanitized command (with safe data regions masked).
174    pub sanitized_command: Option<&'a str>,
175    /// Start byte offset of the match in the original command.
176    pub match_start: usize,
177    /// End byte offset of the match in the original command.
178    pub match_end: usize,
179}
180
181/// Compute confidence for a pattern match.
182///
183/// This analyzes the match context to determine how confident we are
184/// that the match represents actual destructive intent vs. a false positive.
185#[must_use]
186pub fn compute_match_confidence(ctx: &ConfidenceContext<'_>) -> ConfidenceScore {
187    let mut score = ConfidenceScore::high();
188
189    // Signal 1: Check if match is in a sanitized region
190    if let Some(sanitized) = ctx.sanitized_command {
191        if ctx.match_start < sanitized.len()
192            && ctx.match_end <= sanitized.len()
193            && sanitized != ctx.command
194        {
195            // Check if the matched region is different in sanitized vs original
196            let original_slice = ctx.command.get(ctx.match_start..ctx.match_end);
197            let sanitized_slice = sanitized.get(ctx.match_start..ctx.match_end);
198
199            if original_slice != sanitized_slice {
200                // Match was in a sanitized region - low confidence
201                score.add_signal(ConfidenceSignal::SanitizedRegion);
202            }
203        }
204    }
205
206    // Signal 2: Classify span at match location
207    let spans = classify_command(ctx.command);
208    let signal = classify_match_span(&spans, ctx.match_start, ctx.match_end);
209    score.add_signal(signal);
210
211    // Signal 3: Check for execution operators nearby
212    if has_execution_operators_nearby(ctx.command, ctx.match_start, ctx.match_end) {
213        score.add_signal(ConfidenceSignal::ExecutionOperatorsNearby);
214    }
215
216    // Signal 4: Check if match is at command position vs argument position
217    if is_command_position(ctx.command, ctx.match_start) {
218        score.add_signal(ConfidenceSignal::CommandPosition);
219    } else {
220        score.add_signal(ConfidenceSignal::ArgumentPosition);
221    }
222
223    score
224}
225
226/// Classify the span type at a given byte range.
227fn classify_match_span(
228    spans: &CommandSpans,
229    match_start: usize,
230    match_end: usize,
231) -> ConfidenceSignal {
232    // Find the span that contains the match start
233    for span in spans.spans() {
234        if span.byte_range.start <= match_start && match_end <= span.byte_range.end {
235            return match span.kind {
236                SpanKind::Executed => ConfidenceSignal::ExecutedSpan,
237                SpanKind::InlineCode => ConfidenceSignal::InlineCodeSpan,
238                SpanKind::Data => ConfidenceSignal::DataSpan,
239                SpanKind::Argument => ConfidenceSignal::ArgumentSpan,
240                SpanKind::Comment => ConfidenceSignal::CommentSpan,
241                SpanKind::HeredocBody => ConfidenceSignal::HeredocBodySpan,
242                SpanKind::Unknown => ConfidenceSignal::UnknownSpan,
243            };
244        }
245    }
246
247    // Match spans multiple regions or is outside classified spans
248    // Conservative: treat as unknown (moderate confidence)
249    ConfidenceSignal::UnknownSpan
250}
251
252/// Check if there are execution operators near the match.
253///
254/// Execution operators like |, ;, &&, || suggest the command will be executed.
255fn has_execution_operators_nearby(command: &str, match_start: usize, match_end: usize) -> bool {
256    // Look for operators within 20 bytes before the match.
257    // Use get() to handle potential UTF-8 boundary issues if search_start
258    // lands in the middle of a multi-byte character.
259    let search_start = match_start.saturating_sub(20);
260    let prefix = command.get(search_start..match_start).unwrap_or("");
261
262    // Look for operators within 20 bytes after the match
263    let search_end = (match_end + 20).min(command.len());
264    let suffix = command.get(match_end..search_end).unwrap_or("");
265
266    let operators = ["|", ";", "&&", "||", "$(", "`"];
267
268    for op in &operators {
269        if prefix.contains(op) || suffix.contains(op) {
270            return true;
271        }
272    }
273
274    false
275}
276
277/// Check if the match is at command position (first word of a segment).
278fn is_command_position(command: &str, match_start: usize) -> bool {
279    if match_start == 0 {
280        return true;
281    }
282
283    // Get the prefix before the match
284    let prefix = &command[..match_start];
285
286    // Check if the last non-whitespace before match is a segment separator
287    let trimmed = prefix.trim_end();
288    if trimmed.is_empty() {
289        return true;
290    }
291
292    // Command position is after: beginning, |, ;, &&, ||, (, $( , `
293    let last_char = trimmed.chars().last().unwrap_or(' ');
294    matches!(last_char, '|' | ';' | '(' | '`')
295        || trimmed.ends_with("&&")
296        || trimmed.ends_with("||")
297        || trimmed.ends_with("$(")
298}
299
300/// Compute confidence for a match, returning both the score and whether to downgrade.
301///
302/// This is a convenience function that combines confidence computation with
303/// the downgrade decision.
304#[must_use]
305pub fn should_downgrade_to_warn(ctx: &ConfidenceContext<'_>) -> (ConfidenceScore, bool) {
306    let score = compute_match_confidence(ctx);
307    let downgrade = score.should_warn();
308    (score, downgrade)
309}
310
311#[cfg(test)]
312mod tests {
313    use super::*;
314
315    #[test]
316    fn test_high_confidence_executed_command() {
317        let ctx = ConfidenceContext {
318            command: "rm -rf /",
319            sanitized_command: None,
320            match_start: 0,
321            match_end: 8,
322        };
323        let score = compute_match_confidence(&ctx);
324        assert!(
325            score.value > 0.5,
326            "Direct command should have high confidence"
327        );
328    }
329
330    #[test]
331    fn test_low_confidence_in_commit_message() {
332        // Simulating a case where sanitization masked the dangerous content
333        let ctx = ConfidenceContext {
334            command: "git commit -m 'Fix rm -rf detection'",
335            sanitized_command: Some("git commit -m ''"),
336            match_start: 18,
337            match_end: 31,
338        };
339        let score = compute_match_confidence(&ctx);
340        assert!(
341            score.value < 0.5,
342            "Match in sanitized commit message should have low confidence: {}",
343            score.value
344        );
345    }
346
347    #[test]
348    fn test_confidence_with_pipe_operator() {
349        let ctx = ConfidenceContext {
350            command: "echo foo | rm -rf /",
351            sanitized_command: None,
352            match_start: 11,
353            match_end: 19,
354        };
355        let score = compute_match_confidence(&ctx);
356        // Should have execution operators nearby signal
357        assert!(
358            score
359                .signals
360                .contains(&ConfidenceSignal::ExecutionOperatorsNearby),
361            "Should detect pipe operator"
362        );
363    }
364
365    #[test]
366    fn test_command_position_detection() {
367        assert!(is_command_position("rm -rf /", 0));
368        assert!(is_command_position("echo foo | rm -rf /", 11));
369        assert!(is_command_position("foo && rm -rf /", 7));
370        assert!(!is_command_position("git commit -m 'rm'", 15));
371    }
372
373    #[test]
374    fn test_confidence_signal_weights() {
375        assert!(ConfidenceSignal::ExecutedSpan.weight() >= 1.0);
376        assert!(ConfidenceSignal::DataSpan.weight() < 0.5);
377        assert!(ConfidenceSignal::CommentSpan.weight() < 0.1);
378    }
379
380    #[test]
381    fn test_should_warn_threshold() {
382        let mut score = ConfidenceScore::high();
383        assert!(!score.should_warn(), "High confidence should not warn");
384
385        score.add_signal(ConfidenceSignal::DataSpan);
386        assert!(score.should_warn(), "Low confidence should warn");
387    }
388
389    #[test]
390    fn test_utf8_multibyte_handling() {
391        // Test that we don't panic with multi-byte UTF-8 characters near the match.
392        // The emoji "🔥" is 4 bytes. If we subtract 20 bytes from position 4,
393        // we'd get a negative index or land in the middle of the emoji.
394        let command = "🔥🔥🔥 rm -rf /";
395        // "🔥" is 4 bytes each, so "🔥🔥🔥 " is 13 bytes (12 + 1 space)
396        // "rm -rf /" starts at byte 13
397        let ctx = ConfidenceContext {
398            command,
399            sanitized_command: None,
400            match_start: 13, // Start of "rm"
401            match_end: 21,   // End of "rm -rf /"
402        };
403        // This should not panic
404        let score = compute_match_confidence(&ctx);
405        assert!(score.value > 0.0, "Should compute a valid score");
406    }
407
408    #[test]
409    fn test_operators_nearby_with_unicode() {
410        // Test execution operator detection with unicode in prefix
411        let command = "écho café | rm -rf /";
412        // "écho café | " has multi-byte chars, "rm" starts somewhere in the middle
413        let result = has_execution_operators_nearby(command, 14, 22);
414        assert!(
415            result,
416            "Should detect pipe operator even with unicode prefix"
417        );
418    }
419}