Skip to main content

provenant/parsers/
docker.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use crate::parser_warn as warn;
5use serde_json::json;
6
7use crate::models::{DatasourceId, PackageData, PackageType};
8use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
9
10use super::PackageParser;
11use super::license_normalization::normalize_spdx_declared_license;
12
13const PACKAGE_TYPE: PackageType = PackageType::Docker;
14const OCI_LABEL_PREFIX: &str = "org.opencontainers.image.";
15
16fn default_package_data() -> PackageData {
17    PackageData {
18        package_type: Some(PACKAGE_TYPE),
19        primary_language: Some("Dockerfile".to_string()),
20        datasource_id: Some(DatasourceId::Dockerfile),
21        ..Default::default()
22    }
23}
24
25pub struct DockerfileParser;
26
27impl PackageParser for DockerfileParser {
28    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
29
30    fn is_match(path: &Path) -> bool {
31        path.file_name()
32            .and_then(|name| name.to_str())
33            .map(|name| name.to_ascii_lowercase())
34            .is_some_and(|name| {
35                matches!(
36                    name.as_str(),
37                    "dockerfile" | "containerfile" | "containerfile.core"
38                )
39            })
40    }
41
42    fn extract_packages(path: &Path) -> Vec<PackageData> {
43        let content = match read_file_to_string(path, None) {
44            Ok(content) => content,
45            Err(error) => {
46                warn!("Failed to read Dockerfile {:?}: {}", path, error);
47                return vec![default_package_data()];
48            }
49        };
50
51        vec![parse_dockerfile(&content)]
52    }
53}
54
55pub(crate) fn parse_dockerfile(content: &str) -> PackageData {
56    let oci_labels = extract_oci_labels(content);
57    let extra_data = (!oci_labels.is_empty())
58        .then(|| HashMap::from([("oci_labels".to_string(), json!(oci_labels))]));
59    let extracted_license_statement = oci_labels.get("org.opencontainers.image.licenses").cloned();
60    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
61        normalize_spdx_declared_license(extracted_license_statement.as_deref());
62
63    PackageData {
64        package_type: Some(PACKAGE_TYPE),
65        primary_language: Some("Dockerfile".to_string()),
66        datasource_id: Some(DatasourceId::Dockerfile),
67        name: oci_labels
68            .get("org.opencontainers.image.title")
69            .map(|v| truncate_field(v.clone())),
70        description: oci_labels
71            .get("org.opencontainers.image.description")
72            .map(|v| truncate_field(v.clone())),
73        homepage_url: oci_labels
74            .get("org.opencontainers.image.url")
75            .map(|v| truncate_field(v.clone())),
76        vcs_url: oci_labels
77            .get("org.opencontainers.image.source")
78            .map(|v| truncate_field(v.clone())),
79        version: oci_labels
80            .get("org.opencontainers.image.version")
81            .map(|v| truncate_field(v.clone())),
82        declared_license_expression,
83        declared_license_expression_spdx,
84        license_detections,
85        extracted_license_statement: extracted_license_statement.map(truncate_field),
86        extra_data,
87        ..Default::default()
88    }
89}
90
91fn extract_oci_labels(content: &str) -> HashMap<String, String> {
92    let mut labels = HashMap::new();
93
94    for instruction in logical_lines(content) {
95        let trimmed = instruction.trim_start();
96        if !starts_with_instruction(trimmed, "LABEL") {
97            continue;
98        }
99
100        parse_label_instruction(trimmed[5..].trim_start(), &mut labels);
101    }
102
103    labels
104}
105
106fn logical_lines(content: &str) -> Vec<String> {
107    let mut lines = Vec::new();
108    let mut current = String::new();
109    let mut iterations = 0usize;
110
111    for raw_line in content.lines() {
112        iterations += 1;
113        if iterations > MAX_ITERATION_COUNT {
114            warn!("logical_lines: exceeded MAX_ITERATION_COUNT, truncating");
115            break;
116        }
117        let line = raw_line.trim_end();
118        let trimmed = line.trim();
119
120        if current.is_empty() && (trimmed.is_empty() || trimmed.starts_with('#')) {
121            continue;
122        }
123
124        let has_continuation = ends_with_unescaped_backslash(line);
125        let segment = if has_continuation {
126            let mut without_backslash = line.trim_end().to_string();
127            without_backslash.pop();
128            without_backslash.trim().to_string()
129        } else {
130            trimmed.to_string()
131        };
132
133        if !segment.is_empty() {
134            if !current.is_empty() {
135                current.push(' ');
136            }
137            current.push_str(&segment);
138        }
139
140        if !has_continuation && !current.is_empty() {
141            lines.push(current.trim().to_string());
142            current.clear();
143        }
144    }
145
146    if !current.is_empty() {
147        lines.push(current.trim().to_string());
148    }
149
150    lines
151}
152
153fn ends_with_unescaped_backslash(line: &str) -> bool {
154    let trailing = line.chars().rev().take_while(|char| *char == '\\').count();
155    trailing % 2 == 1
156}
157
158fn starts_with_instruction(line: &str, instruction: &str) -> bool {
159    if line.len() < instruction.len()
160        || !line[..instruction.len()].eq_ignore_ascii_case(instruction)
161    {
162        return false;
163    }
164
165    line.chars()
166        .nth(instruction.len())
167        .is_none_or(|next| next.is_whitespace())
168}
169
170fn parse_label_instruction(rest: &str, labels: &mut HashMap<String, String>) {
171    let tokens = tokenize_label_arguments(rest);
172    if tokens.is_empty() {
173        return;
174    }
175
176    if tokens.first().is_some_and(|token| token.contains('=')) {
177        for (i, token) in tokens.into_iter().enumerate() {
178            if i >= MAX_ITERATION_COUNT {
179                warn!("parse_label_instruction: exceeded MAX_ITERATION_COUNT, truncating");
180                break;
181            }
182            let Some((key, value)) = token.split_once('=') else {
183                continue;
184            };
185            let key = key.trim();
186            if key.starts_with(OCI_LABEL_PREFIX) {
187                labels.insert(key.to_string(), truncate_field(value.trim().to_string()));
188            }
189        }
190        return;
191    }
192
193    if let Some((key, values)) = tokens.split_first()
194        && key.starts_with(OCI_LABEL_PREFIX)
195    {
196        labels.insert(
197            key.to_string(),
198            truncate_field(values.join(" ").trim().to_string()),
199        );
200    }
201}
202
203fn tokenize_label_arguments(input: &str) -> Vec<String> {
204    let mut tokens = Vec::new();
205    let mut current = String::new();
206    let mut chars = input.chars().peekable();
207    let mut quote: Option<char> = None;
208    let mut iterations = 0usize;
209
210    while let Some(ch) = chars.next() {
211        iterations += 1;
212        if iterations > MAX_ITERATION_COUNT {
213            warn!("tokenize_label_arguments: exceeded MAX_ITERATION_COUNT, truncating");
214            break;
215        }
216        match quote {
217            Some(current_quote) => {
218                if ch == '\\' {
219                    if let Some(next) = chars.next() {
220                        current.push(next);
221                    }
222                } else if ch == current_quote {
223                    quote = None;
224                } else {
225                    current.push(ch);
226                }
227            }
228            None => match ch {
229                '"' | '\'' => quote = Some(ch),
230                '\\' => {
231                    if let Some(next) = chars.next() {
232                        current.push(next);
233                    }
234                }
235                whitespace if whitespace.is_whitespace() => {
236                    if !current.is_empty() {
237                        tokens.push(std::mem::take(&mut current));
238                    }
239                }
240                _ => current.push(ch),
241            },
242        }
243    }
244
245    if !current.is_empty() {
246        tokens.push(current);
247    }
248
249    tokens
250}
251
252crate::register_parser!(
253    "Dockerfile or Containerfile OCI image metadata",
254    &[
255        "**/Dockerfile",
256        "**/dockerfile",
257        "**/Containerfile",
258        "**/containerfile",
259        "**/Containerfile.core",
260        "**/containerfile.core",
261    ],
262    "docker",
263    "Dockerfile",
264    Some("https://github.com/opencontainers/image-spec/blob/main/annotations.md"),
265);