1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
//! `requirements.txt` detector: unpinned-dependency findings and
//! capability inference (NetworkAccess for known network libs,
//! ProcessExecution for known exec libs).
use std::path::Path;
use crate::artifact_graph::{ArtifactCapability, ArtifactCapabilityFact};
use crate::findings::{
ArtifactKind, EvidenceKind, Finding, MatchTarget, RecommendedAction, Severity, ThreatCategory,
};
use crate::services::artifact_orchestration::manifests::strip_inline_hash_comment;
use crate::services::artifact_orchestration::ArtifactOrchestratorService;
use super::{parse_python_dep_name, PYTHON_EXEC_DEPS, PYTHON_NETWORK_DEPS, PYTHON_VCS_PREFIXES};
pub(crate) fn analyze_requirements_txt(path: &Path, content: &str) -> Vec<Finding> {
let artifact_path = path.display().to_string();
content
.lines()
// `requirements.txt` uses `#` for comments. `;` is the PEP 508
// environment-marker separator (`requests; python_version>='3.6'`)
// and MUST NOT be treated as a comment marker — that's why we use
// the `#`-only stripper here, not the INI variant.
.map(|line| strip_inline_hash_comment(line).trim())
.filter(|line| !line.is_empty())
.filter(|line| !line.starts_with("-r ") && !line.starts_with("--requirement"))
.filter(|line| !line.starts_with("git+") && !line.starts_with("http"))
.filter(|line| !line.starts_with("-c ") && !line.starts_with("--"))
// `==` and `~=` are pinning operators; `!=` is exclusion
// (`requests!=2.0` means "any version except 2.0") and is NOT a
// pin — pre-fix it was lumped in here, so deps that combined an
// exclusion with no real pin (e.g. `requests!=2.0`) silently
// escaped MANIFEST_REQUIREMENTS_UNPINNED_DEP.
.filter(|line| !line.contains("==") && !line.contains("~="))
.map(|line| {
Finding::builder(
"MANIFEST_REQUIREMENTS_UNPINNED_DEP",
ThreatCategory::SupplyChain,
)
.severity(Severity::Low)
.action(RecommendedAction::Log)
.evidence_kind(EvidenceKind::Context)
.matched_on(MatchTarget::ReferencedFile {
path: artifact_path.clone(),
})
.artifact(ArtifactKind::PackageManifest, Some(artifact_path.clone()))
.match_value(line)
.reason("Python requirement is not strictly pinned")
.build()
})
.collect()
}
pub(crate) fn requirements_txt_capabilities(content: &str) -> Vec<ArtifactCapabilityFact> {
let mut capabilities = Vec::new();
for raw_line in content.lines() {
// VCS specs use `#egg=NAME` as a meaningful URL fragment, NOT a
// comment — strip-on-`#` would discard the only place the package
// name appears. Keep the line verbatim when it starts with a VCS
// scheme, and strip the inline `#` comment everywhere else so
// `requests # http client` still resolves to `requests`. `;` is
// kept as a splitter, not a comment marker, so the PEP 508 env
// marker (`requests; python_version>='3.6'`) cuts at the right spot.
let trimmed = raw_line.trim();
let line = if PYTHON_VCS_PREFIXES.iter().any(|p| trimmed.starts_with(p)) {
trimmed
} else {
strip_inline_hash_comment(raw_line).trim()
};
if line.is_empty() {
continue;
}
let Some(dep_name) = parse_python_dep_name(line) else {
continue;
};
if PYTHON_NETWORK_DEPS.iter().any(|d| dep_name == *d) {
capabilities.push(ArtifactOrchestratorService::observed_capability(
ArtifactCapability::NetworkAccess,
));
}
if PYTHON_EXEC_DEPS.iter().any(|d| dep_name == *d) {
capabilities.push(ArtifactOrchestratorService::observed_capability(
ArtifactCapability::ProcessExecution,
));
}
}
// `dedup_by_key` only collapses adjacent runs; deps emit interleaved
// capabilities (e.g. requests, fabric, httpx, invoke), so sort first.
capabilities.sort_by_key(|c| c.capability);
capabilities.dedup_by_key(|c| c.capability);
capabilities
}
#[cfg(test)]
mod tests {
use super::*;
fn capability_present(caps: &[ArtifactCapabilityFact], target: ArtifactCapability) -> bool {
caps.iter().any(|fact| fact.capability == target)
}
fn finding_present(findings: &[Finding], rule_id: &str) -> bool {
findings.iter().any(|finding| finding.rule_id == rule_id)
}
/// Contract: an inline `#` comment in `requirements.txt` is the
/// comment portion. The pre-fix code applied the `!line.contains("==")`
/// filter to the whole line including the comment, so a line like
/// `requests # was: requests==2.0.0` was silently classified as
/// pinned (the historical `==` in the comment satisfied the filter)
/// and the real `requests` (unpinned) was never reported.
#[test]
fn analyze_requirements_txt_does_not_treat_comment_pin_as_pin() {
let content = "requests # was: requests==2.0.0\n";
let path = std::path::Path::new("/pkg/requirements.txt");
let findings = analyze_requirements_txt(path, content);
assert!(
finding_present(&findings, "MANIFEST_REQUIREMENTS_UNPINNED_DEP"),
"real unpinned dep must fire even when a comment contains `==`; got {findings:?}",
);
}
/// Contract: a real `==` pin (no surrounding comment) suppresses the
/// unpinned finding. Pins the positive case so the inline-comment
/// strip doesn't accidentally widen the unpinned set.
#[test]
fn analyze_requirements_txt_respects_real_pin() {
let content = "requests==2.31.0\n";
let path = std::path::Path::new("/pkg/requirements.txt");
let findings = analyze_requirements_txt(path, content);
assert!(
!finding_present(&findings, "MANIFEST_REQUIREMENTS_UNPINNED_DEP"),
"real `==` pin must suppress the unpinned finding; got {findings:?}",
);
}
/// Contract: `!=` is exclusion, NOT a pin. A line like `requests!=2.0`
/// constrains the resolver to exclude 2.0 but still resolves the latest
/// compatible release at install time, so it MUST fire the unpinned
/// finding. Pre-fix the `.contains("!=")` clause silently swallowed
/// these lines as if they were pinned.
#[test]
fn analyze_requirements_txt_flags_not_equal_as_unpinned() {
let content = "requests!=2.0\n";
let path = std::path::Path::new("/pkg/requirements.txt");
let findings = analyze_requirements_txt(path, content);
assert!(
finding_present(&findings, "MANIFEST_REQUIREMENTS_UNPINNED_DEP"),
"`!=` exclusion is not a pin and MUST fire MANIFEST_REQUIREMENTS_UNPINNED_DEP; \
got {findings:?}",
);
}
/// Contract: a line that combines an exclusion with a real pin (e.g.
/// `requests==2.31.0,!=2.30.0`) is still pinned and does NOT fire.
/// Negative case to ensure the `!=` fix didn't accidentally inflate
/// the unpinned set when a `==` pin co-exists.
#[test]
fn analyze_requirements_txt_keeps_compound_pin_with_exclusion() {
let content = "requests==2.31.0,!=2.30.0\n";
let path = std::path::Path::new("/pkg/requirements.txt");
let findings = analyze_requirements_txt(path, content);
assert!(
!finding_present(&findings, "MANIFEST_REQUIREMENTS_UNPINNED_DEP"),
"compound spec containing `==` is pinned; must not fire; got {findings:?}",
);
}
/// Contract: `requirements_txt_capabilities` parses the dep name
/// after stripping any inline `#` comment, so `requests # http
/// client` still flips `NetworkAccess`. Pre-fix the whole line
/// (with comment) became the dep_name and the exact-match check
/// against `"requests"` failed silently.
#[test]
fn requirements_txt_capabilities_handles_inline_hash_comment() {
let content = "requests # http client\nsubprocess32 # back-port\n";
let caps = requirements_txt_capabilities(content);
assert!(
capability_present(&caps, ArtifactCapability::NetworkAccess),
"inline-commented `requests` must still flip NetworkAccess; got {caps:?}",
);
assert!(
capability_present(&caps, ArtifactCapability::ProcessExecution),
"inline-commented `subprocess32` must still flip ProcessExecution; got {caps:?}",
);
}
/// Contract: PEP 508 environment markers like `requests;
/// python_version=="3.6"` MUST still split the dep_name correctly —
/// for `requirements.txt` we treat `;` as a version-spec splitter,
/// NOT as a comment marker. Pins that the inline-`#` strip didn't
/// accidentally swap to the INI-style strip.
#[test]
fn requirements_txt_capabilities_keeps_pep508_env_marker_as_splitter() {
let content = "requests; python_version >= \"3.6\"\n";
let caps = requirements_txt_capabilities(content);
assert!(
capability_present(&caps, ArtifactCapability::NetworkAccess),
"PEP 508 marker must split dep_name correctly; got {caps:?}",
);
}
/// Contract: `parse_python_dep_name` recovers the package name from the
/// legacy VCS form `git+https://host/repo.git#egg=NAME` (also `hg+`,
/// `svn+`, `bzr+`). Pre-fix the split-on-`=` parser produced
/// `git+https://github.com/psf/requests.git#egg` as the dep_name, which
/// silently failed the exact-match check against `requests`, so VCS deps
/// for known network libs never flipped `NetworkAccess`.
#[test]
fn requirements_txt_capabilities_detects_vcs_egg_fragment() {
let content = "git+https://github.com/psf/requests.git#egg=requests\n";
let caps = requirements_txt_capabilities(content);
assert!(
capability_present(&caps, ArtifactCapability::NetworkAccess),
"VCS egg=requests must flip NetworkAccess; got {caps:?}",
);
}
/// Contract: PEP 508 direct reference `NAME @ URL` MUST extract `NAME`
/// even when the URL is a VCS spec. Without the ` @ ` split, the parser
/// falls back to splitting on `=` inside the URL and produces garbage.
#[test]
fn requirements_txt_capabilities_detects_pep508_direct_reference() {
let content = "requests @ git+https://github.com/psf/requests.git\n";
let caps = requirements_txt_capabilities(content);
assert!(
capability_present(&caps, ArtifactCapability::NetworkAccess),
"PEP 508 `NAME @ URL` must flip NetworkAccess; got {caps:?}",
);
}
/// Contract: VCS specs WITHOUT a recoverable name (no `egg=`, no PEP 508
/// prefix) are not silently mapped to a wrong package — they yield
/// `None` from `parse_python_dep_name` and contribute no capability.
/// Negative case to ensure the VCS branch doesn't over-fire.
#[test]
fn requirements_txt_capabilities_skips_vcs_without_name() {
let content = "git+https://github.com/example/internal-tool.git\n";
let caps = requirements_txt_capabilities(content);
assert!(
!capability_present(&caps, ArtifactCapability::NetworkAccess),
"anonymous VCS spec must not flip NetworkAccess; got {caps:?}",
);
}
/// # Contract
/// `requirements_txt_capabilities` must collapse capabilities to a unique
/// set even when matching deps are interleaved across the file. Pre-fix
/// `Vec::dedup_by_key` only removed adjacent duplicates, so a file like
/// `requests / fabric / httpx / invoke` produced `[Net, Exec, Net, Exec]`
/// (four facts) instead of `[Net, Exec]`, inflating the artifact graph
/// and the capability-bonus risk score.
#[test]
fn requirements_txt_capabilities_collapses_interleaved_capabilities() {
let content = "requests\nfabric\nhttpx\ninvoke\n";
let caps = requirements_txt_capabilities(content);
let net_count = caps
.iter()
.filter(|c| c.capability == ArtifactCapability::NetworkAccess)
.count();
let exec_count = caps
.iter()
.filter(|c| c.capability == ArtifactCapability::ProcessExecution)
.count();
assert_eq!(net_count, 1, "NetworkAccess must appear once; got {caps:?}");
assert_eq!(
exec_count, 1,
"ProcessExecution must appear once; got {caps:?}",
);
}
}