1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#![cfg_attr(coverage_nightly, coverage(off))]
//! CB-950 Series: YAML Best Practices Detection
//!
//! Pattern-based YAML defect detection for `pmat comply check`.
//! Targets CI/CD configurations, Kubernetes manifests, and IaC files.
//! Based on: YAML 1.2 spec (Ben-Kiki, Evans & Net, 2009), OWASP secret detection.
use super::types::*;
use std::fs;
use std::path::{Path, PathBuf};
/// Directories to skip when walking for YAML files.
const SKIP_DIRS: &[&str] = &[
".git",
".claude",
"node_modules",
"target",
".pmat",
"vendor",
"build",
"dist",
"__pycache__",
".venv",
".lake", // Lean 4 package cache (submodule dependencies)
".elan", // Lean toolchain
"generated", // Code generated by pv scaffold
];
/// YAML "truthy" strings that cause subtle bugs when unquoted.
/// Note: true/false/True/False/TRUE/FALSE are EXCLUDED because they are the
/// canonical YAML 1.2 boolean values. Quoting them changes the type from bool
/// to string, which breaks parsers that expect native booleans.
const TRUTHY_STRINGS: &[&str] = &[
"yes", "no", "on", "off", "Yes", "No", "On", "Off", "YES", "NO", "ON", "OFF", "y", "n", "Y",
"N",
];
/// CI/CD YAML keys that legitimately require native booleans (not quoted strings).
/// GitHub Actions: `if`, `fail-fast`, `continue-on-error`, `required`, `cancel-in-progress`
/// GitLab CI: `allow_failure`
/// Kubernetes: `readOnly`, `privileged`
const NATIVE_BOOLEAN_KEYS: &[&str] = &[
// GitHub Actions
"if",
"fail-fast",
"continue-on-error",
"required",
"cancel-in-progress",
// GitLab CI
"allow_failure",
// Kubernetes
"readOnly",
"privileged",
// PMAT roadmap schema (boolean fields parsed as native bool)
"active",
"draft",
];
/// Secret-indicating key patterns (case-insensitive).
const SECRET_KEY_PATTERNS: &[&str] = &[
"password",
"secret",
"token",
"api_key",
"apikey",
"api-key",
"private_key",
"privatekey",
"private-key",
"access_key",
"accesskey",
"aws_secret",
"credentials",
"auth_token",
];
/// Known non-secret keys that contain secret-pattern substrings (e.g. "token").
/// These are common ML/LLM inference parameters and permission scopes, not credentials.
const SECRET_KEY_ALLOWLIST: &[&str] = &[
// LLM inference parameters
"max_tokens",
"num_tokens",
"context_tokens",
"token_limit",
"total_tokens",
"completion_tokens",
"prompt_tokens",
"max_output_tokens",
"max_new_tokens",
"token_count",
"tokens_per_second",
// ML model architecture fields (tokenizer / embedding config)
"eos_token",
"bos_token",
"pad_token",
"unk_token",
"sep_token",
"cls_token",
"mask_token",
"embed_tokens",
"token_type_embeddings",
"token_embeddings",
"stop_tokens",
"special_tokens",
"added_tokens",
"additional_special_tokens",
"token_type_ids",
"min_tokens",
"min_token_l2",
"saves_per_token",
"tokens_per_batch",
"token_dim",
"token_vocab_size",
"vocab_token",
"tokenizer",
// GitHub Actions permission scopes (not secrets)
"id-token",
"id_token",
];
// =============================================================================
// Included submodules
// =============================================================================
include!("yaml_bp_file_walking.rs");
include!("yaml_bp_detectors.rs");