destructive_command_guard/
confidence.rs1use crate::context::{CommandSpans, SpanKind, classify_command};
36use smallvec::SmallVec;
37
38#[derive(Debug, Clone, Copy, PartialEq, Eq)]
40pub enum ConfidenceSignal {
41 ExecutedSpan,
43 InlineCodeSpan,
45 DataSpan,
47 ArgumentSpan,
49 CommentSpan,
51 HeredocBodySpan,
53 UnknownSpan,
55 SanitizedRegion,
57 ExecutionOperatorsNearby,
59 CommandPosition,
61 ArgumentPosition,
63}
64
65impl ConfidenceSignal {
66 #[must_use]
71 pub const fn weight(self) -> f32 {
72 match self {
73 Self::ExecutedSpan | Self::InlineCodeSpan => 1.0,
75 Self::CommandPosition | Self::ExecutionOperatorsNearby => 1.1, Self::DataSpan => 0.1,
78 Self::CommentSpan => 0.05,
79 Self::ArgumentSpan => 0.3,
80 Self::SanitizedRegion => 0.2,
81 Self::ArgumentPosition => 0.6,
82 Self::HeredocBodySpan => 0.7, Self::UnknownSpan => 0.8, }
86 }
87
88 #[must_use]
90 pub const fn description(self) -> &'static str {
91 match self {
92 Self::ExecutedSpan => "match is in executed code",
93 Self::InlineCodeSpan => "match is in inline code (bash -c, python -c, etc.)",
94 Self::DataSpan => "match is in a data string (single-quoted)",
95 Self::CommentSpan => "match is in a comment",
96 Self::ArgumentSpan => "match is in a string argument to a safe command",
97 Self::HeredocBodySpan => "match is in a heredoc body",
98 Self::UnknownSpan => "match context is ambiguous",
99 Self::SanitizedRegion => "match was in a region masked by sanitization",
100 Self::ExecutionOperatorsNearby => "execution operators (|, ;, &&) found nearby",
101 Self::CommandPosition => "match is at command position",
102 Self::ArgumentPosition => "match is in argument position",
103 }
104 }
105}
106
107#[derive(Debug, Clone)]
109pub struct ConfidenceScore {
110 pub value: f32,
113 pub signals: SmallVec<[ConfidenceSignal; 4]>,
115}
116
117impl Default for ConfidenceScore {
118 fn default() -> Self {
119 Self::high()
120 }
121}
122
123impl ConfidenceScore {
124 #[must_use]
126 pub fn high() -> Self {
127 Self {
128 value: 1.0,
129 signals: SmallVec::new(),
130 }
131 }
132
133 #[must_use]
135 pub fn low(signal: ConfidenceSignal) -> Self {
136 let mut signals = SmallVec::new();
137 signals.push(signal);
138 Self {
139 value: signal.weight(),
140 signals,
141 }
142 }
143
144 pub fn add_signal(&mut self, signal: ConfidenceSignal) {
146 self.signals.push(signal);
147 self.value = (self.value * signal.weight()).clamp(0.0, 1.0);
149 }
150
151 #[must_use]
153 pub fn is_low(&self, threshold: f32) -> bool {
154 self.value < threshold
155 }
156
157 #[must_use]
161 pub fn should_warn(&self) -> bool {
162 self.is_low(DEFAULT_WARN_THRESHOLD)
163 }
164}
165
166pub const DEFAULT_WARN_THRESHOLD: f32 = 0.5;
168
169pub struct ConfidenceContext<'a> {
171 pub command: &'a str,
173 pub sanitized_command: Option<&'a str>,
175 pub match_start: usize,
177 pub match_end: usize,
179}
180
181#[must_use]
186pub fn compute_match_confidence(ctx: &ConfidenceContext<'_>) -> ConfidenceScore {
187 let mut score = ConfidenceScore::high();
188
189 if let Some(sanitized) = ctx.sanitized_command {
191 if ctx.match_start < sanitized.len()
192 && ctx.match_end <= sanitized.len()
193 && sanitized != ctx.command
194 {
195 let original_slice = ctx.command.get(ctx.match_start..ctx.match_end);
197 let sanitized_slice = sanitized.get(ctx.match_start..ctx.match_end);
198
199 if original_slice != sanitized_slice {
200 score.add_signal(ConfidenceSignal::SanitizedRegion);
202 }
203 }
204 }
205
206 let spans = classify_command(ctx.command);
208 let signal = classify_match_span(&spans, ctx.match_start, ctx.match_end);
209 score.add_signal(signal);
210
211 if has_execution_operators_nearby(ctx.command, ctx.match_start, ctx.match_end) {
213 score.add_signal(ConfidenceSignal::ExecutionOperatorsNearby);
214 }
215
216 if is_command_position(ctx.command, ctx.match_start) {
218 score.add_signal(ConfidenceSignal::CommandPosition);
219 } else {
220 score.add_signal(ConfidenceSignal::ArgumentPosition);
221 }
222
223 score
224}
225
226fn classify_match_span(
228 spans: &CommandSpans,
229 match_start: usize,
230 match_end: usize,
231) -> ConfidenceSignal {
232 for span in spans.spans() {
234 if span.byte_range.start <= match_start && match_end <= span.byte_range.end {
235 return match span.kind {
236 SpanKind::Executed => ConfidenceSignal::ExecutedSpan,
237 SpanKind::InlineCode => ConfidenceSignal::InlineCodeSpan,
238 SpanKind::Data => ConfidenceSignal::DataSpan,
239 SpanKind::Argument => ConfidenceSignal::ArgumentSpan,
240 SpanKind::Comment => ConfidenceSignal::CommentSpan,
241 SpanKind::HeredocBody => ConfidenceSignal::HeredocBodySpan,
242 SpanKind::Unknown => ConfidenceSignal::UnknownSpan,
243 };
244 }
245 }
246
247 ConfidenceSignal::UnknownSpan
250}
251
252fn has_execution_operators_nearby(command: &str, match_start: usize, match_end: usize) -> bool {
256 let search_start = match_start.saturating_sub(20);
260 let prefix = command.get(search_start..match_start).unwrap_or("");
261
262 let search_end = (match_end + 20).min(command.len());
264 let suffix = command.get(match_end..search_end).unwrap_or("");
265
266 let operators = ["|", ";", "&&", "||", "$(", "`"];
267
268 for op in &operators {
269 if prefix.contains(op) || suffix.contains(op) {
270 return true;
271 }
272 }
273
274 false
275}
276
277fn is_command_position(command: &str, match_start: usize) -> bool {
279 if match_start == 0 {
280 return true;
281 }
282
283 let prefix = &command[..match_start];
285
286 let trimmed = prefix.trim_end();
288 if trimmed.is_empty() {
289 return true;
290 }
291
292 let last_char = trimmed.chars().last().unwrap_or(' ');
294 matches!(last_char, '|' | ';' | '(' | '`')
295 || trimmed.ends_with("&&")
296 || trimmed.ends_with("||")
297 || trimmed.ends_with("$(")
298}
299
300#[must_use]
305pub fn should_downgrade_to_warn(ctx: &ConfidenceContext<'_>) -> (ConfidenceScore, bool) {
306 let score = compute_match_confidence(ctx);
307 let downgrade = score.should_warn();
308 (score, downgrade)
309}
310
311#[cfg(test)]
312mod tests {
313 use super::*;
314
315 #[test]
316 fn test_high_confidence_executed_command() {
317 let ctx = ConfidenceContext {
318 command: "rm -rf /",
319 sanitized_command: None,
320 match_start: 0,
321 match_end: 8,
322 };
323 let score = compute_match_confidence(&ctx);
324 assert!(
325 score.value > 0.5,
326 "Direct command should have high confidence"
327 );
328 }
329
330 #[test]
331 fn test_low_confidence_in_commit_message() {
332 let ctx = ConfidenceContext {
334 command: "git commit -m 'Fix rm -rf detection'",
335 sanitized_command: Some("git commit -m ''"),
336 match_start: 18,
337 match_end: 31,
338 };
339 let score = compute_match_confidence(&ctx);
340 assert!(
341 score.value < 0.5,
342 "Match in sanitized commit message should have low confidence: {}",
343 score.value
344 );
345 }
346
347 #[test]
348 fn test_confidence_with_pipe_operator() {
349 let ctx = ConfidenceContext {
350 command: "echo foo | rm -rf /",
351 sanitized_command: None,
352 match_start: 11,
353 match_end: 19,
354 };
355 let score = compute_match_confidence(&ctx);
356 assert!(
358 score
359 .signals
360 .contains(&ConfidenceSignal::ExecutionOperatorsNearby),
361 "Should detect pipe operator"
362 );
363 }
364
365 #[test]
366 fn test_command_position_detection() {
367 assert!(is_command_position("rm -rf /", 0));
368 assert!(is_command_position("echo foo | rm -rf /", 11));
369 assert!(is_command_position("foo && rm -rf /", 7));
370 assert!(!is_command_position("git commit -m 'rm'", 15));
371 }
372
373 #[test]
374 fn test_confidence_signal_weights() {
375 assert!(ConfidenceSignal::ExecutedSpan.weight() >= 1.0);
376 assert!(ConfidenceSignal::DataSpan.weight() < 0.5);
377 assert!(ConfidenceSignal::CommentSpan.weight() < 0.1);
378 }
379
380 #[test]
381 fn test_should_warn_threshold() {
382 let mut score = ConfidenceScore::high();
383 assert!(!score.should_warn(), "High confidence should not warn");
384
385 score.add_signal(ConfidenceSignal::DataSpan);
386 assert!(score.should_warn(), "Low confidence should warn");
387 }
388
389 #[test]
390 fn test_utf8_multibyte_handling() {
391 let command = "🔥🔥🔥 rm -rf /";
395 let ctx = ConfidenceContext {
398 command,
399 sanitized_command: None,
400 match_start: 13, match_end: 21, };
403 let score = compute_match_confidence(&ctx);
405 assert!(score.value > 0.0, "Should compute a valid score");
406 }
407
408 #[test]
409 fn test_operators_nearby_with_unicode() {
410 let command = "écho café | rm -rf /";
412 let result = has_execution_operators_nearby(command, 14, 22);
414 assert!(
415 result,
416 "Should detect pipe operator even with unicode prefix"
417 );
418 }
419}