Skip to main content

provenant/parsers/
docker.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use crate::parser_warn as warn;
5use serde_json::json;
6
7use crate::models::{DatasourceId, PackageData, PackageType};
8use crate::parsers::utils::read_file_to_string;
9
10use super::PackageParser;
11use super::license_normalization::normalize_spdx_declared_license;
12
13const PACKAGE_TYPE: PackageType = PackageType::Docker;
14const OCI_LABEL_PREFIX: &str = "org.opencontainers.image.";
15
16fn default_package_data() -> PackageData {
17    PackageData {
18        package_type: Some(PACKAGE_TYPE),
19        primary_language: Some("Dockerfile".to_string()),
20        datasource_id: Some(DatasourceId::Dockerfile),
21        ..Default::default()
22    }
23}
24
25pub struct DockerfileParser;
26
27impl PackageParser for DockerfileParser {
28    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
29
30    fn is_match(path: &Path) -> bool {
31        path.file_name()
32            .and_then(|name| name.to_str())
33            .map(|name| name.to_ascii_lowercase())
34            .is_some_and(|name| {
35                matches!(
36                    name.as_str(),
37                    "dockerfile" | "containerfile" | "containerfile.core"
38                )
39            })
40    }
41
42    fn extract_packages(path: &Path) -> Vec<PackageData> {
43        let content = match read_file_to_string(path) {
44            Ok(content) => content,
45            Err(error) => {
46                warn!("Failed to read Dockerfile {:?}: {}", path, error);
47                return vec![default_package_data()];
48            }
49        };
50
51        vec![parse_dockerfile(&content)]
52    }
53}
54
55pub(crate) fn parse_dockerfile(content: &str) -> PackageData {
56    let oci_labels = extract_oci_labels(content);
57    let extra_data = (!oci_labels.is_empty())
58        .then(|| HashMap::from([("oci_labels".to_string(), json!(oci_labels))]));
59    let extracted_license_statement = oci_labels.get("org.opencontainers.image.licenses").cloned();
60    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
61        normalize_spdx_declared_license(extracted_license_statement.as_deref());
62
63    PackageData {
64        package_type: Some(PACKAGE_TYPE),
65        primary_language: Some("Dockerfile".to_string()),
66        datasource_id: Some(DatasourceId::Dockerfile),
67        name: oci_labels.get("org.opencontainers.image.title").cloned(),
68        description: oci_labels
69            .get("org.opencontainers.image.description")
70            .cloned(),
71        homepage_url: oci_labels.get("org.opencontainers.image.url").cloned(),
72        vcs_url: oci_labels.get("org.opencontainers.image.source").cloned(),
73        version: oci_labels.get("org.opencontainers.image.version").cloned(),
74        declared_license_expression,
75        declared_license_expression_spdx,
76        license_detections,
77        extracted_license_statement,
78        extra_data,
79        ..Default::default()
80    }
81}
82
83fn extract_oci_labels(content: &str) -> HashMap<String, String> {
84    let mut labels = HashMap::new();
85
86    for instruction in logical_lines(content) {
87        let trimmed = instruction.trim_start();
88        if !starts_with_instruction(trimmed, "LABEL") {
89            continue;
90        }
91
92        parse_label_instruction(trimmed[5..].trim_start(), &mut labels);
93    }
94
95    labels
96}
97
98fn logical_lines(content: &str) -> Vec<String> {
99    let mut lines = Vec::new();
100    let mut current = String::new();
101
102    for raw_line in content.lines() {
103        let line = raw_line.trim_end();
104        let trimmed = line.trim();
105
106        if current.is_empty() && (trimmed.is_empty() || trimmed.starts_with('#')) {
107            continue;
108        }
109
110        let has_continuation = ends_with_unescaped_backslash(line);
111        let segment = if has_continuation {
112            let mut without_backslash = line.trim_end().to_string();
113            without_backslash.pop();
114            without_backslash.trim().to_string()
115        } else {
116            trimmed.to_string()
117        };
118
119        if !segment.is_empty() {
120            if !current.is_empty() {
121                current.push(' ');
122            }
123            current.push_str(&segment);
124        }
125
126        if !has_continuation && !current.is_empty() {
127            lines.push(current.trim().to_string());
128            current.clear();
129        }
130    }
131
132    if !current.is_empty() {
133        lines.push(current.trim().to_string());
134    }
135
136    lines
137}
138
139fn ends_with_unescaped_backslash(line: &str) -> bool {
140    let trailing = line.chars().rev().take_while(|char| *char == '\\').count();
141    trailing % 2 == 1
142}
143
144fn starts_with_instruction(line: &str, instruction: &str) -> bool {
145    if line.len() < instruction.len()
146        || !line[..instruction.len()].eq_ignore_ascii_case(instruction)
147    {
148        return false;
149    }
150
151    line.chars()
152        .nth(instruction.len())
153        .is_none_or(|next| next.is_whitespace())
154}
155
156fn parse_label_instruction(rest: &str, labels: &mut HashMap<String, String>) {
157    let tokens = tokenize_label_arguments(rest);
158    if tokens.is_empty() {
159        return;
160    }
161
162    if tokens.first().is_some_and(|token| token.contains('=')) {
163        for token in tokens {
164            let Some((key, value)) = token.split_once('=') else {
165                continue;
166            };
167            let key = key.trim();
168            if key.starts_with(OCI_LABEL_PREFIX) {
169                labels.insert(key.to_string(), value.trim().to_string());
170            }
171        }
172        return;
173    }
174
175    if let Some((key, values)) = tokens.split_first()
176        && key.starts_with(OCI_LABEL_PREFIX)
177    {
178        labels.insert(key.to_string(), values.join(" ").trim().to_string());
179    }
180}
181
182fn tokenize_label_arguments(input: &str) -> Vec<String> {
183    let mut tokens = Vec::new();
184    let mut current = String::new();
185    let mut chars = input.chars().peekable();
186    let mut quote: Option<char> = None;
187
188    while let Some(ch) = chars.next() {
189        match quote {
190            Some(current_quote) => {
191                if ch == '\\' {
192                    if let Some(next) = chars.next() {
193                        current.push(next);
194                    }
195                } else if ch == current_quote {
196                    quote = None;
197                } else {
198                    current.push(ch);
199                }
200            }
201            None => match ch {
202                '"' | '\'' => quote = Some(ch),
203                '\\' => {
204                    if let Some(next) = chars.next() {
205                        current.push(next);
206                    }
207                }
208                whitespace if whitespace.is_whitespace() => {
209                    if !current.is_empty() {
210                        tokens.push(std::mem::take(&mut current));
211                    }
212                }
213                _ => current.push(ch),
214            },
215        }
216    }
217
218    if !current.is_empty() {
219        tokens.push(current);
220    }
221
222    tokens
223}
224
225crate::register_parser!(
226    "Dockerfile or Containerfile OCI image metadata",
227    &[
228        "**/Dockerfile",
229        "**/dockerfile",
230        "**/Containerfile",
231        "**/containerfile",
232        "**/Containerfile.core",
233        "**/containerfile.core",
234    ],
235    "docker",
236    "Dockerfile",
237    Some("https://github.com/opencontainers/image-spec/blob/main/annotations.md"),
238);