Skip to main content

keyhog_core/
spec.rs

1//! Detector specification: TOML-based pattern definitions with regex, keywords,
2//! verification endpoints, and companion patterns.
3
4mod load;
5mod validate;
6
7use serde::{Deserialize, Serialize};
8use thiserror::Error;
9
10pub use load::{
11    load_detector_cache, load_detectors, load_detectors_with_gate, save_detector_cache,
12};
13pub use validate::{QualityIssue, validate_detector};
14
15/// A single detector specification, parsed from a TOML file.
16/// Each file in the `detectors/` directory produces one of these.
17///
18/// # Examples
19///
20/// ```rust
21/// use keyhog_core::{DetectorFile, DetectorSpec, PatternSpec, Severity};
22///
23/// let file = DetectorFile {
24///     detector: DetectorSpec {
25///         id: "demo-token".into(),
26///         name: "Demo Token".into(),
27///         service: "demo".into(),
28///         severity: Severity::High,
29///         patterns: vec![PatternSpec {
30///             regex: "demo_[A-Z0-9]{8}".into(),
31///             description: None,
32///             group: None,
33///         }],
34///         companion: None,
35///         verify: None,
36///         keywords: vec!["demo_".into()],
37///     },
38/// };
39///
40/// assert_eq!(file.detector.service, "demo");
41/// ```
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct DetectorFile {
44    /// Parsed detector payload from the TOML file.
45    pub detector: DetectorSpec,
46}
47
48/// Full detector definition loaded from TOML.
49///
50/// # Examples
51///
52/// ```rust
53/// use keyhog_core::{DetectorSpec, PatternSpec, Severity};
54///
55/// let spec = DetectorSpec {
56///     id: "demo-token".into(),
57///     name: "Demo Token".into(),
58///     service: "demo".into(),
59///     severity: Severity::High,
60///     patterns: vec![PatternSpec {
61///         regex: "demo_[A-Z0-9]{8}".into(),
62///         description: Some("Demo credential".into()),
63///         group: None,
64///     }],
65///     companion: None,
66///     verify: None,
67///     keywords: vec!["demo_".into()],
68/// };
69///
70/// assert_eq!(spec.patterns.len(), 1);
71/// ```
72#[derive(Debug, Clone, Serialize, Deserialize)]
73pub struct DetectorSpec {
74    /// Stable detector identifier.
75    pub id: String,
76    /// Human-readable detector name.
77    pub name: String,
78    /// Service namespace used for grouping and verification limits.
79    pub service: String,
80    /// Severity reported for matches from this detector.
81    pub severity: Severity,
82    /// One or more regex patterns that identify the credential.
83    pub patterns: Vec<PatternSpec>,
84    #[serde(default)]
85    /// Optional nearby companion requirement.
86    pub companion: Option<CompanionSpec>,
87    #[serde(default)]
88    /// Optional live-verification configuration.
89    pub verify: Option<VerifySpec>,
90    #[serde(default)]
91    /// Context keywords that help lower false positives.
92    pub keywords: Vec<String>,
93}
94
95/// One regex pattern entry inside a detector.
96///
97/// # Examples
98///
99/// ```rust
100/// use keyhog_core::PatternSpec;
101///
102/// let pattern = PatternSpec {
103///     regex: "(demo_[A-Z0-9]{8})".into(),
104///     description: Some("Capture the credential".into()),
105///     group: Some(1),
106/// };
107///
108/// assert_eq!(pattern.group, Some(1));
109/// ```
110#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct PatternSpec {
112    /// Regex used to detect the credential.
113    pub regex: String,
114    #[serde(default)]
115    /// Optional human-readable description for the pattern.
116    pub description: Option<String>,
117    #[serde(default)]
118    /// Capture group index to use as the credential payload.
119    pub group: Option<usize>,
120}
121
122/// A secondary pattern that must appear near the primary match.
123/// Example: AWS secret key found within 5 lines of an access key.
124///
125/// # Examples
126///
127/// ```rust
128/// use keyhog_core::CompanionSpec;
129///
130/// let companion = CompanionSpec {
131///     regex: "secret_[A-Z0-9]{8}".into(),
132///     within_lines: 3,
133///     name: "secret".into(),
134/// };
135///
136/// assert_eq!(companion.within_lines, 3);
137/// ```
138#[derive(Debug, Clone, Serialize, Deserialize)]
139pub struct CompanionSpec {
140    /// Regex used to locate the companion value.
141    pub regex: String,
142    #[serde(default = "default_within_lines")]
143    /// Search radius in lines around the primary match.
144    pub within_lines: usize,
145    /// Logical companion name used for interpolation.
146    pub name: String,
147}
148
149fn default_within_lines() -> usize {
150    5
151}
152
153/// Verification HTTP request and success criteria for a detector.
154///
155/// # Examples
156///
157/// ```rust
158/// use keyhog_core::{AuthSpec, HeaderSpec, HttpMethod, SuccessSpec, VerifySpec};
159///
160/// let verify = VerifySpec {
161///     method: HttpMethod::Get,
162///     url: "https://api.example.com/v1/me".into(),
163///     auth: AuthSpec::Bearer { field: "match".into() },
164///     headers: vec![HeaderSpec {
165///         name: "X-Client".into(),
166///         value: "keyhog".into(),
167///     }],
168///     body: None,
169///     success: SuccessSpec {
170///         status: Some(200),
171///         status_not: None,
172///         body_contains: None,
173///         body_not_contains: None,
174///         json_path: None,
175///         equals: None,
176///     },
177///     metadata: Vec::new(),
178///     timeout_ms: Some(2_000),
179/// };
180///
181/// assert_eq!(verify.timeout_ms, Some(2_000));
182/// ```
183#[derive(Debug, Clone, Serialize, Deserialize)]
184pub struct VerifySpec {
185    /// HTTP method to use for verification.
186    pub method: HttpMethod,
187    /// URL template for the verification request.
188    pub url: String,
189    /// Authentication scheme for the request.
190    pub auth: AuthSpec,
191    #[serde(default)]
192    /// Additional request headers.
193    pub headers: Vec<HeaderSpec>,
194    #[serde(default)]
195    /// Optional request body template.
196    pub body: Option<String>,
197    /// Success criteria for the response.
198    pub success: SuccessSpec,
199    #[serde(default)]
200    /// Metadata extraction rules for live responses.
201    pub metadata: Vec<MetadataSpec>,
202    #[serde(default)]
203    /// Optional per-detector timeout override in milliseconds.
204    pub timeout_ms: Option<u64>,
205}
206
207/// One extra request header to attach during verification.
208///
209/// # Examples
210///
211/// ```rust
212/// use keyhog_core::HeaderSpec;
213///
214/// let header = HeaderSpec {
215///     name: "X-Client".into(),
216///     value: "keyhog".into(),
217/// };
218///
219/// assert_eq!(header.name, "X-Client");
220/// ```
221#[derive(Debug, Clone, Serialize, Deserialize)]
222pub struct HeaderSpec {
223    /// Header name.
224    pub name: String,
225    /// Header value template.
226    pub value: String,
227}
228
229/// How to attach the credential to the verification request.
230/// The `field` values are interpolation references:
231///   - `"match"` — the primary matched credential
232///   - `"companion.<name>"` — a companion match
233///   - anything else — literal string
234///
235/// # Examples
236///
237/// ```rust
238/// use keyhog_core::AuthSpec;
239///
240/// let auth = AuthSpec::Bearer { field: "match".into() };
241/// assert!(matches!(auth, AuthSpec::Bearer { .. }));
242/// ```
243#[derive(Debug, Clone, Serialize, Deserialize)]
244#[serde(tag = "type", rename_all = "snake_case")]
245pub enum AuthSpec {
246    /// Send the request without explicit auth decoration.
247    None,
248    /// Put the resolved credential in an `Authorization: Bearer` header.
249    Bearer {
250        /// Interpolation field supplying the bearer token.
251        field: String,
252    },
253    /// Send HTTP basic auth.
254    Basic {
255        /// Username field or literal.
256        username: String,
257        /// Password field or literal.
258        password: String,
259    },
260    /// Put the credential into a custom header.
261    Header {
262        /// Header name.
263        name: String,
264        /// Header value template.
265        template: String,
266    },
267    /// Put the credential into a query parameter.
268    Query {
269        /// Query parameter name.
270        param: String,
271        /// Interpolation field supplying the parameter value.
272        field: String,
273    },
274    /// Use a lightweight AWS SigV4 liveness probe.
275    AwsV4 {
276        /// Access-key interpolation field.
277        access_key: String,
278        /// Secret-key interpolation field.
279        secret_key: String,
280        #[serde(default = "default_aws_region")]
281        /// AWS region for the probe.
282        region: String,
283        /// AWS service identifier to sign for.
284        service: String,
285    },
286}
287
288fn default_aws_region() -> String {
289    "us-east-1".to_string()
290}
291
292/// Conditions that must ALL be true for verification to succeed.
293/// All fields are optional; present fields form an implicit AND.
294///
295/// # Examples
296///
297/// ```rust
298/// use keyhog_core::SuccessSpec;
299///
300/// let success = SuccessSpec {
301///     status: Some(200),
302///     status_not: Some(401),
303///     body_contains: Some("ok".into()),
304///     body_not_contains: None,
305///     json_path: None,
306///     equals: None,
307/// };
308///
309/// assert_eq!(success.status, Some(200));
310/// ```
311#[derive(Debug, Clone, Serialize, Deserialize)]
312pub struct SuccessSpec {
313    #[serde(default)]
314    /// Required HTTP status code.
315    pub status: Option<u16>,
316    #[serde(default)]
317    /// Forbidden HTTP status code.
318    pub status_not: Option<u16>,
319    #[serde(default)]
320    /// Substring that must appear in the response body.
321    pub body_contains: Option<String>,
322    #[serde(default)]
323    /// Substring that must not appear in the response body.
324    pub body_not_contains: Option<String>,
325    #[serde(default)]
326    /// JSON path that must resolve successfully.
327    pub json_path: Option<String>,
328    #[serde(default)]
329    /// Optional stringified value expected at `json_path`.
330    pub equals: Option<String>,
331}
332
333/// Metadata extraction rule applied to a verification response.
334///
335/// # Examples
336///
337/// ```rust
338/// use keyhog_core::MetadataSpec;
339///
340/// let metadata = MetadataSpec {
341///     name: "account_id".into(),
342///     json_path: Some("data.id".into()),
343///     header: None,
344///     regex: None,
345///     group: None,
346/// };
347///
348/// assert_eq!(metadata.name, "account_id");
349/// ```
350#[derive(Debug, Clone, Serialize, Deserialize)]
351pub struct MetadataSpec {
352    /// Output metadata key.
353    pub name: String,
354    #[serde(default)]
355    /// JSON path to extract from the response body.
356    pub json_path: Option<String>,
357    #[serde(default)]
358    /// Header name to extract when header capture is supported.
359    pub header: Option<String>,
360    #[serde(default)]
361    /// Optional regex applied to the extracted value.
362    pub regex: Option<String>,
363    #[serde(default)]
364    /// Optional capture-group index for the metadata regex.
365    pub group: Option<usize>,
366}
367
368/// Severity level attached to detector matches.
369///
370/// # Examples
371///
372/// ```rust
373/// use keyhog_core::Severity;
374///
375/// assert_eq!(Severity::Critical.to_string(), "critical");
376/// ```
377#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Deserialize, Serialize)]
378#[serde(rename_all = "lowercase")]
379pub enum Severity {
380    /// Informational finding.
381    Info,
382    /// Low-severity finding.
383    Low,
384    /// Medium-severity finding.
385    Medium,
386    /// High-severity finding.
387    High,
388    /// Critical finding.
389    Critical,
390}
391
392impl std::fmt::Display for Severity {
393    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
394        match self {
395            Self::Info => write!(f, "info"),
396            Self::Low => write!(f, "low"),
397            Self::Medium => write!(f, "medium"),
398            Self::High => write!(f, "high"),
399            Self::Critical => write!(f, "critical"),
400        }
401    }
402}
403
404/// HTTP methods supported by detector verification specs.
405///
406/// # Examples
407///
408/// ```rust
409/// use keyhog_core::HttpMethod;
410///
411/// assert!(matches!(HttpMethod::Post, HttpMethod::Post));
412/// ```
413#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
414#[serde(rename_all = "UPPERCASE")]
415pub enum HttpMethod {
416    /// HTTP GET.
417    Get,
418    /// HTTP POST.
419    Post,
420    /// HTTP PUT.
421    Put,
422    /// HTTP DELETE.
423    Delete,
424    /// HTTP HEAD.
425    Head,
426    /// HTTP PATCH.
427    Patch,
428}
429
430/// Errors that occur while loading detector specs from disk.
431///
432/// # Examples
433///
434/// ```rust
435/// use keyhog_core::SpecError;
436///
437/// let error = SpecError::ReadFile {
438///     path: "detectors/demo.toml".into(),
439///     source: std::io::Error::other("permission denied"),
440/// };
441/// assert!(error.to_string().contains("Fix"));
442/// ```
443#[derive(Debug, Error)]
444pub enum SpecError {
445    #[error(
446        "failed to read detector file {path}: {source}. Fix: check the detector path exists and that the file is readable TOML"
447    )]
448    ReadFile {
449        path: String,
450        source: std::io::Error,
451    },
452}
453
454#[cfg(test)]
455mod tests {
456    use super::*;
457    use regex::Regex;
458
459    #[test]
460    fn parse_bearer_auth() {
461        let toml_str = r#"
462[detector]
463id = "slack-bot-token"
464name = "Slack Bot Token"
465service = "slack"
466severity = "critical"
467
468[[detector.patterns]]
469regex = "xoxb-[0-9]{10,13}-[0-9]{10,13}-[a-zA-Z0-9]{24}"
470
471[detector.verify]
472method = "POST"
473url = "https://slack.com/api/auth.test"
474
475[detector.verify.auth]
476type = "bearer"
477field = "match"
478
479[detector.verify.success]
480status = 200
481json_path = "ok"
482equals = "true"
483
484[[detector.verify.metadata]]
485name = "team"
486json_path = "team"
487"#;
488        let file: DetectorFile = toml::from_str(toml_str).unwrap();
489        assert_eq!(file.detector.id, "slack-bot-token");
490        assert_eq!(file.detector.severity, Severity::Critical);
491        assert!(file.detector.verify.is_some());
492        let verify = file.detector.verify.unwrap();
493        assert!(matches!(verify.auth, AuthSpec::Bearer { .. }));
494    }
495
496    #[test]
497    fn parse_basic_auth() {
498        let toml_str = r#"
499[detector]
500id = "stripe-secret-key"
501name = "Stripe Secret Key"
502service = "stripe"
503severity = "critical"
504
505[[detector.patterns]]
506regex = "sk_live_[a-zA-Z0-9]{24,}"
507
508[detector.verify]
509method = "GET"
510url = "https://api.stripe.com/v1/charges?limit=1"
511
512[detector.verify.auth]
513type = "basic"
514username = "match"
515password = ""
516
517[detector.verify.success]
518status = 200
519"#;
520        let file: DetectorFile = toml::from_str(toml_str).unwrap();
521        assert_eq!(file.detector.id, "stripe-secret-key");
522        assert!(matches!(
523            file.detector.verify.unwrap().auth,
524            AuthSpec::Basic { .. }
525        ));
526    }
527
528    #[test]
529    fn parse_companion_spec() {
530        let toml_str = r#"
531[detector]
532id = "aws-access-key"
533name = "AWS Access Key"
534service = "aws"
535severity = "critical"
536
537[[detector.patterns]]
538regex = "(AKIA|ASIA)[0-9A-Z]{16}"
539
540[detector.companion]
541regex = "[0-9a-zA-Z/+=]{40}"
542within_lines = 5
543name = "secret_key"
544
545[detector.verify]
546method = "GET"
547url = "https://sts.amazonaws.com/?Action=GetCallerIdentity&Version=2011-06-15"
548
549[detector.verify.auth]
550type = "aws_v4"
551access_key = "match"
552secret_key = "companion.secret_key"
553region = "us-east-1"
554service = "sts"
555
556[detector.verify.success]
557status = 200
558"#;
559        let file: DetectorFile = toml::from_str(toml_str).unwrap();
560        assert!(file.detector.companion.is_some());
561        let comp = file.detector.companion.unwrap();
562        assert_eq!(comp.name, "secret_key");
563        assert_eq!(comp.within_lines, 5);
564    }
565
566    #[test]
567    fn injects_github_classic_pat_compat_detector() {
568        let mut detectors = vec![DetectorSpec {
569            id: "github-pat-fine-grained".into(),
570            name: "GitHub Fine-Grained PAT".into(),
571            service: "github".into(),
572            severity: Severity::Critical,
573            patterns: vec![PatternSpec {
574                regex: "github_pat_[a-zA-Z0-9]{22}_[a-zA-Z0-9]{59}".into(),
575                description: None,
576                group: None,
577            }],
578            companion: None,
579            verify: None,
580            keywords: vec!["github_pat_".into(), "github".into()],
581        }];
582
583        load::inject_github_classic_pat_detector(&mut detectors);
584
585        let compat = detectors
586            .iter()
587            .find(|d| d.id == "github-classic-pat")
588            .expect("compat detector missing");
589        assert_eq!(compat.service, "github");
590        assert_eq!(compat.patterns[0].regex, "ghp_[a-zA-Z0-9]{36,40}");
591    }
592
593    #[test]
594    fn supabase_anon_detector_requires_context_anchor() {
595        let file: DetectorFile =
596            toml::from_str(include_str!("../../../detectors/supabase-anon-key.toml"))
597                .expect("supabase detector should parse");
598        assert_eq!(file.detector.patterns.len(), 1);
599        let regex = Regex::new(&file.detector.patterns[0].regex).unwrap();
600        assert!(
601            regex.is_match("SUPABASE_ANON_KEY=eyJhbGciOiJIUzI1NiJ9.eyJyb2xlIjoiYW5vbiJ9.signature")
602        );
603        assert!(!regex.is_match("eyJhbGciOiJIUzI1NiJ9.eyJyb2xlIjoiYW5vbiJ9.signature"));
604    }
605
606    #[test]
607    fn ceph_companion_requires_ceph_secret_context() {
608        let file: DetectorFile = toml::from_str(include_str!(
609            "../../../detectors/ceph-rados-gateway-credentials.toml"
610        ))
611        .expect("ceph detector should parse");
612        let companion = file.detector.companion.expect("ceph companion missing");
613        let regex = Regex::new(&companion.regex).unwrap();
614        assert!(regex.is_match("CEPH_SECRET_KEY=abcdEFGHijklMNOPqrstUVWXyz0123456789/+=="));
615        assert!(!regex.is_match("abcdEFGHijklMNOPqrstUVWXyz0123456789/+=="));
616    }
617
618    #[test]
619    fn lepton_secondary_pattern_needs_lepton_specific_context() {
620        let file: DetectorFile =
621            toml::from_str(include_str!("../../../detectors/leptonai-api-token.toml"))
622                .expect("lepton detector should parse");
623        let regex = Regex::new(&file.detector.patterns[1].regex).unwrap();
624        assert!(regex.is_match("LEPTON_TOKEN=abcdefghijklmnopqrstuvwxyz123456 lepton.ai"));
625        assert!(!regex.is_match("token=abcdefghijklmnopqrstuvwxyz123456 example.com"));
626    }
627
628    #[test]
629    fn infura_detector_uses_basic_auth_with_companion_secret() {
630        let file: DetectorFile = toml::from_str(include_str!(
631            "../../../detectors/infura-project-credentials.toml"
632        ))
633        .expect("infura detector should parse");
634        let verify = file.detector.verify.expect("infura verify missing");
635        match verify.auth {
636            AuthSpec::Basic { username, password } => {
637                assert_eq!(username, "match");
638                assert_eq!(password, "companion.infura_project_secret");
639            }
640            other => panic!("unexpected auth spec: {other:?}"),
641        }
642    }
643
644    #[test]
645    fn retool_detector_is_unverifiable_without_deployment_domain() {
646        let file: DetectorFile =
647            toml::from_str(include_str!("../../../detectors/retool-api-key.toml"))
648                .expect("retool detector should parse");
649        assert!(file.detector.verify.is_none());
650    }
651
652    #[test]
653    fn aws_session_token_detector_requires_aws_specific_anchors() {
654        let file: DetectorFile =
655            toml::from_str(include_str!("../../../detectors/aws-session-token.toml"))
656                .expect("aws session token detector should parse");
657        assert!(file.detector.verify.is_none());
658        let env_regex = Regex::new(&file.detector.patterns[0].regex).unwrap();
659        assert!(env_regex.is_match(
660            "AWS_SESSION_TOKEN=IQoJb3JpZ2luX2VjENP//////////wEaCXVzLWVhc3QtMSJGMEQCIBexampleTOKENexampleTOKENexampleTOKENexampleTOKEN"
661        ));
662        assert!(!env_regex.is_match(
663            "IQoJb3JpZ2luX2VjENP//////////wEaCXVzLWVhc3QtMSJGMEQCIBexampleTOKENexampleTOKENexampleTOKENexampleTOKEN"
664        ));
665    }
666
667    #[test]
668    fn aws_secrets_manager_arn_detector_is_info_only_and_unverified() {
669        let file: DetectorFile = toml::from_str(include_str!(
670            "../../../detectors/aws-secrets-manager-arn.toml"
671        ))
672        .expect("aws secrets manager arn detector should parse");
673        assert_eq!(file.detector.id, "aws-secrets-manager-arn");
674        assert_eq!(file.detector.severity, Severity::Info);
675        assert!(file.detector.verify.is_none());
676    }
677
678    #[test]
679    fn tightened_companion_detectors_require_service_specific_context() {
680        let vonage: DetectorFile =
681            toml::from_str(include_str!("../../../detectors/vonage-video-api.toml"))
682                .expect("vonage detector should parse");
683        let vonage_companion = Regex::new(
684            &vonage
685                .detector
686                .companion
687                .as_ref()
688                .expect("vonage companion missing")
689                .regex,
690        )
691        .unwrap();
692        assert!(vonage_companion.is_match("VONAGE_API_SECRET=abcdef0123456789"));
693        assert!(!vonage_companion.is_match("abcdef0123456789"));
694
695        let wix: DetectorFile =
696            toml::from_str(include_str!("../../../detectors/wix-api-credentials.toml"))
697                .expect("wix detector should parse");
698        let wix_companion = Regex::new(
699            &wix.detector
700                .companion
701                .as_ref()
702                .expect("wix companion missing")
703                .regex,
704        )
705        .unwrap();
706        assert!(wix_companion.is_match("wix instance_id=123e4567-e89b-12d3-a456-426614174000"));
707        assert!(!wix_companion.is_match("123e4567-e89b-12d3-a456-426614174000"));
708
709        let codecommit: DetectorFile = toml::from_str(include_str!(
710            "../../../detectors/aws-codecommit-credentials.toml"
711        ))
712        .expect("codecommit detector should parse");
713        let codecommit_companion = Regex::new(
714            &codecommit
715                .detector
716                .companion
717                .as_ref()
718                .expect("codecommit companion missing")
719                .regex,
720        )
721        .unwrap();
722        assert!(
723            codecommit_companion
724                .is_match("CODECOMMIT_PASSWORD=AbCdEfGhIjKlMnOpQrStUvWxYz0123456789/+==")
725        );
726        assert!(!codecommit_companion.is_match("AbCdEfGhIjKlMnOpQrStUvWxYz0123456789/+=="));
727    }
728}