use std::collections::HashMap;
use std::path::Path;
use crate::parser_warn as warn;
use serde_json::json;
use crate::models::{DatasourceId, PackageData, PackageType};
use crate::parsers::utils::read_file_to_string;
use super::PackageParser;
use super::license_normalization::normalize_spdx_declared_license;
const PACKAGE_TYPE: PackageType = PackageType::Docker;
const OCI_LABEL_PREFIX: &str = "org.opencontainers.image.";
fn default_package_data() -> PackageData {
PackageData {
package_type: Some(PACKAGE_TYPE),
primary_language: Some("Dockerfile".to_string()),
datasource_id: Some(DatasourceId::Dockerfile),
..Default::default()
}
}
pub struct DockerfileParser;
impl PackageParser for DockerfileParser {
const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
fn is_match(path: &Path) -> bool {
path.file_name()
.and_then(|name| name.to_str())
.map(|name| name.to_ascii_lowercase())
.is_some_and(|name| {
matches!(
name.as_str(),
"dockerfile" | "containerfile" | "containerfile.core"
)
})
}
fn extract_packages(path: &Path) -> Vec<PackageData> {
let content = match read_file_to_string(path) {
Ok(content) => content,
Err(error) => {
warn!("Failed to read Dockerfile {:?}: {}", path, error);
return vec![default_package_data()];
}
};
vec![parse_dockerfile(&content)]
}
}
pub(crate) fn parse_dockerfile(content: &str) -> PackageData {
let oci_labels = extract_oci_labels(content);
let extra_data = (!oci_labels.is_empty())
.then(|| HashMap::from([("oci_labels".to_string(), json!(oci_labels))]));
let extracted_license_statement = oci_labels.get("org.opencontainers.image.licenses").cloned();
let (declared_license_expression, declared_license_expression_spdx, license_detections) =
normalize_spdx_declared_license(extracted_license_statement.as_deref());
PackageData {
package_type: Some(PACKAGE_TYPE),
primary_language: Some("Dockerfile".to_string()),
datasource_id: Some(DatasourceId::Dockerfile),
name: oci_labels.get("org.opencontainers.image.title").cloned(),
description: oci_labels
.get("org.opencontainers.image.description")
.cloned(),
homepage_url: oci_labels.get("org.opencontainers.image.url").cloned(),
vcs_url: oci_labels.get("org.opencontainers.image.source").cloned(),
version: oci_labels.get("org.opencontainers.image.version").cloned(),
declared_license_expression,
declared_license_expression_spdx,
license_detections,
extracted_license_statement,
extra_data,
..Default::default()
}
}
fn extract_oci_labels(content: &str) -> HashMap<String, String> {
let mut labels = HashMap::new();
for instruction in logical_lines(content) {
let trimmed = instruction.trim_start();
if !starts_with_instruction(trimmed, "LABEL") {
continue;
}
parse_label_instruction(trimmed[5..].trim_start(), &mut labels);
}
labels
}
fn logical_lines(content: &str) -> Vec<String> {
let mut lines = Vec::new();
let mut current = String::new();
for raw_line in content.lines() {
let line = raw_line.trim_end();
let trimmed = line.trim();
if current.is_empty() && (trimmed.is_empty() || trimmed.starts_with('#')) {
continue;
}
let has_continuation = ends_with_unescaped_backslash(line);
let segment = if has_continuation {
let mut without_backslash = line.trim_end().to_string();
without_backslash.pop();
without_backslash.trim().to_string()
} else {
trimmed.to_string()
};
if !segment.is_empty() {
if !current.is_empty() {
current.push(' ');
}
current.push_str(&segment);
}
if !has_continuation && !current.is_empty() {
lines.push(current.trim().to_string());
current.clear();
}
}
if !current.is_empty() {
lines.push(current.trim().to_string());
}
lines
}
fn ends_with_unescaped_backslash(line: &str) -> bool {
let trailing = line.chars().rev().take_while(|char| *char == '\\').count();
trailing % 2 == 1
}
fn starts_with_instruction(line: &str, instruction: &str) -> bool {
if line.len() < instruction.len()
|| !line[..instruction.len()].eq_ignore_ascii_case(instruction)
{
return false;
}
line.chars()
.nth(instruction.len())
.is_none_or(|next| next.is_whitespace())
}
fn parse_label_instruction(rest: &str, labels: &mut HashMap<String, String>) {
let tokens = tokenize_label_arguments(rest);
if tokens.is_empty() {
return;
}
if tokens.first().is_some_and(|token| token.contains('=')) {
for token in tokens {
let Some((key, value)) = token.split_once('=') else {
continue;
};
let key = key.trim();
if key.starts_with(OCI_LABEL_PREFIX) {
labels.insert(key.to_string(), value.trim().to_string());
}
}
return;
}
if let Some((key, values)) = tokens.split_first()
&& key.starts_with(OCI_LABEL_PREFIX)
{
labels.insert(key.to_string(), values.join(" ").trim().to_string());
}
}
fn tokenize_label_arguments(input: &str) -> Vec<String> {
let mut tokens = Vec::new();
let mut current = String::new();
let mut chars = input.chars().peekable();
let mut quote: Option<char> = None;
while let Some(ch) = chars.next() {
match quote {
Some(current_quote) => {
if ch == '\\' {
if let Some(next) = chars.next() {
current.push(next);
}
} else if ch == current_quote {
quote = None;
} else {
current.push(ch);
}
}
None => match ch {
'"' | '\'' => quote = Some(ch),
'\\' => {
if let Some(next) = chars.next() {
current.push(next);
}
}
whitespace if whitespace.is_whitespace() => {
if !current.is_empty() {
tokens.push(std::mem::take(&mut current));
}
}
_ => current.push(ch),
},
}
}
if !current.is_empty() {
tokens.push(current);
}
tokens
}
crate::register_parser!(
"Dockerfile or Containerfile OCI image metadata",
&[
"**/Dockerfile",
"**/dockerfile",
"**/Containerfile",
"**/containerfile",
"**/Containerfile.core",
"**/containerfile.core",
],
"docker",
"Dockerfile",
Some("https://github.com/opencontainers/image-spec/blob/main/annotations.md"),
);