use crate::detectors::{ContextAnalyzer, PatternDetector};
use crate::models::{Context, Finding, Location};
use crate::scan_warn;
use anyhow::Result;
use std::path::PathBuf;
const MAX_FILE_SIZE: usize = 2 * 1024 * 1024;
const BINARY_EXTENSIONS: &[&str] = &[
"zip", "tar", "gz", "bz2", "xz", "7z", "rar", "zst", "lz4", "exe", "dll", "so", "dylib",
"bin", "jpg", "jpeg", "png", "gif", "bmp", "ico", "svg", "webp", "tiff", "avif", "mp3",
"mp4", "avi", "mov", "mkv", "flac", "wav", "ogg", "webm", "woff", "woff2", "ttf", "eot",
"otf", "pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx", "dat", "db", "sqlite", "sqlite3",
"parquet", "arrow", "iso", "img", "dmg", "vmdk", "map", "o", "a", "lib", "obj", "class",
"pyc",
];
const SKIP_PREFIXES: &[&str] = &[
"usr/share/doc/",
"usr/share/man/",
"usr/share/locale/",
"usr/share/zoneinfo/",
"usr/share/i18n/",
"usr/share/terminfo/",
"usr/lib/",
"usr/lib64/",
"lib/",
"lib64/",
"proc/",
"sys/",
"dev/",
"run/",
"var/cache/",
"var/log/",
];
const SKIP_FILES: &[&str] = &[
"etc/ssl/certs/ca-certificates.crt",
"etc/ssl/certs/ca-bundle.crt",
"etc/ssl/cert.pem",
"etc/pki/tls/certs/ca-bundle.crt",
"etc/pki/tls/certs/ca-bundle.trust.crt",
"etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem",
"etc/pki/ca-trust/extracted/openssl/ca-bundle.trust.crt",
"usr/share/ca-certificates/",
"etc/ca-certificates/",
];
pub struct DockerScanner {
image: String,
entropy_threshold: f64,
custom_patterns: Vec<crate::config::settings::CustomPattern>,
pull: bool,
}
impl DockerScanner {
pub fn new(image: String) -> Self {
Self {
image,
entropy_threshold: 3.5,
custom_patterns: Vec::new(),
pull: true,
}
}
pub fn with_entropy_threshold(mut self, threshold: f64) -> Self {
self.entropy_threshold = threshold;
self
}
pub fn with_custom_patterns(
mut self,
patterns: Vec<crate::config::settings::CustomPattern>,
) -> Self {
self.custom_patterns = patterns;
self
}
pub fn with_pull(mut self, pull: bool) -> Self {
self.pull = pull;
self
}
pub async fn scan(&self) -> Result<Vec<Finding>> {
use bollard::Docker;
use futures_util::StreamExt;
let docker = Docker::connect_with_local_defaults()
.map_err(|e| anyhow::anyhow!("Cannot connect to Docker daemon: {}\nMake sure Docker is running.", e))?;
if self.pull {
use bollard::image::CreateImageOptions;
let (repo, tag) = parse_image_ref(&self.image);
let opts = CreateImageOptions {
from_image: repo.clone(),
tag: tag.clone(),
..Default::default()
};
let mut stream = docker.create_image(Some(opts), None, None);
while let Some(item) = stream.next().await {
match item {
Ok(_) => {} Err(e) => {
scan_warn!("docker", "pull failed for {}: {} (trying local)", self.image, e);
break;
}
}
}
}
let container_config = bollard::container::Config {
image: Some(self.image.clone()),
entrypoint: Some(vec!["/bin/true".to_string()]),
..Default::default()
};
let container = docker
.create_container::<String, String>(None, container_config)
.await
.map_err(|e| {
anyhow::anyhow!(
"Cannot create container from image {}: {}\nIs the image available locally or pullable?",
self.image,
e
)
})?;
let container_id = container.id.clone();
let result = self
.scan_container_export(&docker, &container_id)
.await;
let _ = docker
.remove_container(
&container_id,
Some(bollard::container::RemoveContainerOptions {
force: true,
..Default::default()
}),
)
.await;
result
}
async fn scan_container_export(
&self,
docker: &bollard::Docker,
container_id: &str,
) -> Result<Vec<Finding>> {
use futures_util::StreamExt;
let detector = if self.custom_patterns.is_empty() {
PatternDetector::new()
} else {
PatternDetector::with_custom_patterns(&self.custom_patterns)
};
let mut tar_bytes: Vec<u8> = Vec::new();
let mut stream = docker.export_container(container_id);
while let Some(chunk) = stream.next().await {
match chunk {
Ok(bytes) => tar_bytes.extend_from_slice(&bytes),
Err(e) => {
scan_warn!("docker", "error reading export stream: {}", e);
break;
}
}
}
let mut findings: Vec<Finding> = Vec::new();
let mut archive = tar::Archive::new(tar_bytes.as_slice());
for entry_result in archive.entries()? {
let mut entry = match entry_result {
Ok(e) => e,
Err(e) => {
scan_warn!("docker", "tar entry error: {}", e);
continue;
}
};
if entry.header().entry_type() != tar::EntryType::Regular {
continue;
}
let entry_path = match entry.path() {
Ok(p) => p.to_path_buf(),
Err(_) => continue,
};
let path_str = entry_path.to_string_lossy().to_string();
if SKIP_PREFIXES.iter().any(|pfx| path_str.starts_with(pfx)) {
continue;
}
if SKIP_FILES.iter().any(|f| path_str == *f || path_str.starts_with(f)) {
continue;
}
if is_binary_path(&path_str) {
continue;
}
let size = entry.header().size().unwrap_or(0) as usize;
if size > MAX_FILE_SIZE || size == 0 {
continue;
}
use std::io::Read;
let mut content_bytes = Vec::with_capacity(size);
if entry.read_to_end(&mut content_bytes).is_err() {
continue;
}
if content_bytes[..content_bytes.len().min(512)].contains(&0u8) {
continue;
}
let content = match String::from_utf8(content_bytes) {
Ok(s) => s,
Err(_) => continue,
};
let virtual_path = PathBuf::from(format!("docker://{}/{}", self.image, path_str));
match self.scan_text_content(&content, &virtual_path, &detector) {
Ok(file_findings) => findings.extend(file_findings),
Err(e) => {
scan_warn!("docker", "scan error for {}: {}", path_str, e);
}
}
}
Ok(findings)
}
fn scan_text_content(
&self,
content: &str,
virtual_path: &std::path::Path,
detector: &PatternDetector,
) -> Result<Vec<Finding>> {
let mut findings = Vec::new();
let file_context = ContextAnalyzer::analyze_file(virtual_path);
let lines: Vec<&str> = content.lines().collect();
for (line_num, line) in lines.iter().enumerate() {
if ContextAnalyzer::is_placeholder(line) || line.len() > 5000 {
continue;
}
let is_comment = ContextAnalyzer::is_comment(line);
let pattern_matches =
detector.scan_line_with_positions(line, self.entropy_threshold);
for pm in pattern_matches {
let mut secret = pm.secret;
if is_comment {
secret.confidence *= 0.75;
}
let line_before = if line_num > 0 {
Some(lines[line_num - 1].to_string())
} else {
None
};
let line_after = if line_num + 1 < lines.len() {
Some(lines[line_num + 1].to_string())
} else {
None
};
secret.severity = ContextAnalyzer::adjust_severity(
secret.severity,
&Context {
line_before: line_before.clone(),
line_content: line.to_string(),
line_after: line_after.clone(),
is_test_file: file_context.is_test_file,
is_config_file: file_context.is_config_file,
is_documentation: file_context.is_documentation,
file_extension: file_context.file_extension.clone(),
},
);
let location = Location {
file_path: virtual_path.to_path_buf(),
line_number: line_num + 1,
column_start: pm.column_start,
column_end: pm.column_end,
commit_hash: None,
commit_author: None,
commit_date: None,
};
let context = ContextAnalyzer::build_context(
line.to_string(),
line_before,
line_after,
&file_context,
);
findings.push(Finding::new(secret, location, context));
}
}
Ok(findings)
}
}
fn parse_image_ref(image: &str) -> (String, String) {
if image.contains('@') {
return (image.to_string(), String::new());
}
if let Some(colon_pos) = image.rfind(':') {
let after = &image[colon_pos + 1..];
if !after.contains('/') {
return (
image[..colon_pos].to_string(),
after.to_string(),
);
}
}
(image.to_string(), "latest".to_string())
}
fn is_binary_path(path: &str) -> bool {
if let Some(dot_pos) = path.rfind('.') {
let ext = &path[dot_pos + 1..];
BINARY_EXTENSIONS.contains(&ext.to_lowercase().as_str())
} else {
false
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_image_ref_simple() {
let (repo, tag) = parse_image_ref("nginx:1.25");
assert_eq!(repo, "nginx");
assert_eq!(tag, "1.25");
}
#[test]
fn test_parse_image_ref_latest() {
let (repo, tag) = parse_image_ref("nginx");
assert_eq!(repo, "nginx");
assert_eq!(tag, "latest");
}
#[test]
fn test_parse_image_ref_registry() {
let (repo, tag) = parse_image_ref("ghcr.io/org/app:v2");
assert_eq!(repo, "ghcr.io/org/app");
assert_eq!(tag, "v2");
}
#[test]
fn test_parse_image_ref_registry_port() {
let (repo, tag) = parse_image_ref("registry:5000/myapp:latest");
assert_eq!(repo, "registry:5000/myapp");
assert_eq!(tag, "latest");
}
#[test]
fn test_binary_path_detection() {
assert!(is_binary_path("usr/bin/app.exe"));
assert!(is_binary_path("opt/data/image.png"));
assert!(!is_binary_path("etc/nginx/nginx.conf"));
assert!(!is_binary_path("app/config.yaml"));
}
}