1use serde::{Deserialize, Serialize};
5use std::borrow::Cow;
6use std::collections::HashMap;
7use std::sync::Arc;
8
9use crate::Severity;
10
11#[derive(Clone, Serialize, Deserialize)]
22pub struct RawMatch {
23 #[serde(with = "serde_arc_str")]
25 pub detector_id: Arc<str>,
26 #[serde(with = "serde_arc_str")]
28 pub detector_name: Arc<str>,
29 #[serde(with = "serde_arc_str")]
31 pub service: Arc<str>,
32 pub severity: Severity,
34 #[serde(with = "serde_arc_str")]
36 pub credential: Arc<str>,
37 pub credential_hash: String,
39 pub companions: std::collections::HashMap<String, String>,
41 pub location: MatchLocation,
43 #[serde(skip_serializing_if = "Option::is_none")]
45 pub entropy: Option<f64>,
46 #[serde(skip_serializing_if = "Option::is_none")]
48 pub confidence: Option<f64>,
49}
50
51impl RawMatch {
52 pub fn sanitize_floats(mut self) -> Self {
56 if self.entropy.is_some_and(f64::is_nan) {
57 self.entropy = None;
58 }
59 if self.confidence.is_some_and(f64::is_nan) {
60 self.confidence = None;
61 }
62 self
63 }
64}
65
66impl PartialEq for RawMatch {
67 fn eq(&self, other: &Self) -> bool {
68 self.detector_id == other.detector_id
73 && self.detector_name == other.detector_name
74 && self.service == other.service
75 && self.severity == other.severity
76 && self.credential == other.credential
77 && self.credential_hash == other.credential_hash
78 && self.companions == other.companions
79 && self.location == other.location
80 && opt_f64_total_eq(self.entropy, other.entropy)
81 && opt_f64_total_eq(self.confidence, other.confidence)
82 }
83}
84
85impl Eq for RawMatch {}
86
87impl std::fmt::Debug for RawMatch {
88 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
92 f.debug_struct("RawMatch")
93 .field("detector_id", &self.detector_id)
94 .field("detector_name", &self.detector_name)
95 .field("service", &self.service)
96 .field("severity", &self.severity)
97 .field(
98 "credential",
99 &format_args!("<redacted {} bytes>", self.credential.len()),
100 )
101 .field("credential_hash", &self.credential_hash)
102 .field(
103 "companions",
104 &format_args!("<{} redacted companions>", self.companions.len()),
105 )
106 .field("location", &self.location)
107 .field("entropy", &self.entropy)
108 .field("confidence", &self.confidence)
109 .finish()
110 }
111}
112
113#[inline]
114fn opt_f64_total_eq(a: Option<f64>, b: Option<f64>) -> bool {
115 match (a, b) {
116 (None, None) => true,
117 (Some(x), Some(y)) => x.total_cmp(&y) == std::cmp::Ordering::Equal,
118 _ => false,
119 }
120}
121
122impl PartialOrd for RawMatch {
123 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
124 Some(self.cmp(other))
125 }
126}
127
128impl Ord for RawMatch {
129 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
130 let self_conf = self.confidence.unwrap_or(0.0);
132 let other_conf = other.confidence.unwrap_or(0.0);
133
134 match other_conf.total_cmp(&self_conf) {
135 std::cmp::Ordering::Equal => {}
136 ord => return ord,
137 }
138
139 match other.severity.cmp(&self.severity) {
141 std::cmp::Ordering::Equal => {}
142 ord => return ord,
143 }
144
145 match self.detector_id.cmp(&other.detector_id) {
147 std::cmp::Ordering::Equal => self.credential.cmp(&other.credential),
148 ord => ord,
149 }
150 }
151}
152
153#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
155pub struct MatchLocation {
156 #[serde(with = "serde_arc_str")]
158 pub source: Arc<str>,
159 #[serde(with = "serde_arc_str_opt")]
165 pub file_path: Option<Arc<str>>,
166 pub line: Option<usize>,
168 pub offset: usize,
170 #[serde(with = "serde_arc_str_opt")]
172 pub commit: Option<Arc<str>>,
173 #[serde(with = "serde_arc_str_opt")]
175 pub author: Option<Arc<str>>,
176 #[serde(with = "serde_arc_str_opt")]
178 pub date: Option<Arc<str>>,
179}
180
181#[derive(Debug, Clone, Serialize, Deserialize)]
183pub struct VerifiedFinding {
184 #[serde(with = "serde_arc_str")]
186 pub detector_id: Arc<str>,
187 #[serde(with = "serde_arc_str")]
189 pub detector_name: Arc<str>,
190 #[serde(with = "serde_arc_str")]
192 pub service: Arc<str>,
193 pub severity: Severity,
195 pub credential_redacted: Cow<'static, str>,
197 pub credential_hash: String,
199 pub location: MatchLocation,
201 pub verification: VerificationResult,
203 pub metadata: HashMap<String, String>,
205 pub additional_locations: Vec<MatchLocation>,
207 #[serde(skip_serializing_if = "Option::is_none")]
209 pub confidence: Option<f64>,
210}
211
212#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
214#[serde(rename_all = "snake_case")]
215pub enum VerificationResult {
216 Live,
218 Revoked,
220 Dead,
222 RateLimited,
224 Error(String),
226 Unverifiable,
228 Skipped,
230}
231
232impl RawMatch {
233 pub fn deduplication_key(&self) -> (&str, &str) {
235 (&self.detector_id, &self.credential)
236 }
237
238 pub fn to_redacted(&self) -> RedactedFinding {
243 RedactedFinding {
244 detector_id: self.detector_id.clone(),
245 detector_name: self.detector_name.clone(),
246 service: self.service.clone(),
247 severity: self.severity,
248 credential_redacted: crate::redact(&self.credential),
249 credential_hash: self.credential_hash.clone(),
250 companions_redacted: self
251 .companions
252 .iter()
253 .map(|(k, v)| (k.clone(), crate::redact(v).into_owned()))
254 .collect(),
255 location: self.location.clone(),
256 entropy: self.entropy,
257 confidence: self.confidence,
258 }
259 }
260}
261
262#[derive(Debug, Clone, Serialize, Deserialize)]
266pub struct RedactedFinding {
267 #[serde(with = "serde_arc_str")]
268 pub detector_id: Arc<str>,
269 #[serde(with = "serde_arc_str")]
270 pub detector_name: Arc<str>,
271 #[serde(with = "serde_arc_str")]
272 pub service: Arc<str>,
273 pub severity: Severity,
274 pub credential_redacted: Cow<'static, str>,
275 pub credential_hash: String,
276 pub companions_redacted: HashMap<String, String>,
277 pub location: MatchLocation,
278 #[serde(skip_serializing_if = "Option::is_none")]
279 pub entropy: Option<f64>,
280 #[serde(skip_serializing_if = "Option::is_none")]
281 pub confidence: Option<f64>,
282}
283
284pub mod serde_arc_str {
285 use serde::{Deserialize, Deserializer, Serialize, Serializer};
286 use std::sync::Arc;
287
288 pub fn serialize<S>(val: &Arc<str>, serializer: S) -> Result<S::Ok, S::Error>
289 where
290 S: Serializer,
291 {
292 val.as_ref().serialize(serializer)
293 }
294
295 pub fn deserialize<'de, D>(deserializer: D) -> Result<Arc<str>, D::Error>
296 where
297 D: Deserializer<'de>,
298 {
299 String::deserialize(deserializer).map(Arc::from)
300 }
301}
302
303pub mod serde_arc_str_opt {
304 use serde::{Deserialize, Deserializer, Serialize, Serializer};
305 use std::sync::Arc;
306
307 pub fn serialize<S>(val: &Option<Arc<str>>, serializer: S) -> Result<S::Ok, S::Error>
308 where
309 S: Serializer,
310 {
311 val.as_ref().map(|s| s.as_ref()).serialize(serializer)
312 }
313
314 pub fn deserialize<'de, D>(deserializer: D) -> Result<Option<Arc<str>>, D::Error>
315 where
316 D: Deserializer<'de>,
317 {
318 Option::<String>::deserialize(deserializer).map(|opt| opt.map(Arc::from))
319 }
320}
321
322#[cfg(test)]
323mod hostile_metadata_tests {
324 use super::*;
334 use std::collections::HashMap;
335 use std::sync::Arc;
336
337 fn finding_with_hostile_path(path: &str) -> VerifiedFinding {
338 VerifiedFinding {
339 detector_id: Arc::from("test-detector"),
340 detector_name: Arc::from("Test Detector"),
341 service: Arc::from("test"),
342 severity: Severity::Medium,
343 credential_redacted: Cow::Borrowed("****"),
344 credential_hash: "deadbeef".into(),
345 location: MatchLocation {
346 source: Arc::from("filesystem"),
347 file_path: Some(Arc::from(path)),
348 line: Some(1),
349 offset: 0,
350 commit: None,
351 author: None,
352 date: None,
353 },
354 verification: VerificationResult::Skipped,
355 metadata: HashMap::new(),
356 additional_locations: Vec::new(),
357 confidence: Some(0.5),
358 }
359 }
360
361 #[test]
362 fn nul_bytes_in_path_serialize_to_valid_json() {
363 let finding = finding_with_hostile_path("evil\0name.env");
368 let json = serde_json::to_string(&finding).expect("serialize ok");
369 assert!(json.contains("\\u0000"), "NUL must be escaped in JSON");
370 let v: serde_json::Value = serde_json::from_str(&json).expect("parse ok");
372 assert!(v.is_object());
373 }
374
375 #[test]
376 fn control_chars_in_path_serialize_safely() {
377 let finding =
382 finding_with_hostile_path("path\r\nwith\x1b[31mANSI\x1bcontrol\tchars");
383 let json = serde_json::to_string(&finding).expect("serialize ok");
384 assert!(json.contains("\\r"));
385 assert!(json.contains("\\n"));
386 assert!(json.contains("\\t"));
387 assert!(json.contains("\\u001b"));
389 let _: serde_json::Value = serde_json::from_str(&json).expect("parse ok");
390 }
391
392 #[test]
393 fn replacement_char_in_path_round_trips() {
394 let finding = finding_with_hostile_path("name_\u{FFFD}_after");
399 let json = serde_json::to_string(&finding).expect("serialize ok");
400 let v: serde_json::Value = serde_json::from_str(&json).expect("parse ok");
401 let recovered = v["location"]["file_path"].as_str().unwrap();
402 assert!(recovered.contains('\u{FFFD}'));
403 }
404
405 #[test]
406 fn extremely_long_path_does_not_panic() {
407 let long = "a".repeat(1024 * 1024);
411 let finding = finding_with_hostile_path(&long);
412 let json = serde_json::to_string(&finding).expect("serialize ok");
413 assert!(json.len() > 1024 * 1024);
414 let v: serde_json::Value = serde_json::from_str(&json).expect("parse ok");
415 assert_eq!(
416 v["location"]["file_path"].as_str().unwrap().len(),
417 long.len()
418 );
419 }
420}