Skip to main content

provenant/parsers/
docker.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use log::warn;
5use serde_json::json;
6
7use crate::models::{DatasourceId, PackageData, PackageType};
8use crate::parsers::utils::read_file_to_string;
9
10use super::PackageParser;
11
12const PACKAGE_TYPE: PackageType = PackageType::Docker;
13const OCI_LABEL_PREFIX: &str = "org.opencontainers.image.";
14
15fn default_package_data() -> PackageData {
16    PackageData {
17        package_type: Some(PACKAGE_TYPE),
18        primary_language: Some("Dockerfile".to_string()),
19        datasource_id: Some(DatasourceId::Dockerfile),
20        ..Default::default()
21    }
22}
23
24pub struct DockerfileParser;
25
26impl PackageParser for DockerfileParser {
27    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
28
29    fn is_match(path: &Path) -> bool {
30        path.file_name()
31            .and_then(|name| name.to_str())
32            .map(|name| name.to_ascii_lowercase())
33            .is_some_and(|name| {
34                matches!(
35                    name.as_str(),
36                    "dockerfile" | "containerfile" | "containerfile.core"
37                )
38            })
39    }
40
41    fn extract_packages(path: &Path) -> Vec<PackageData> {
42        let content = match read_file_to_string(path) {
43            Ok(content) => content,
44            Err(error) => {
45                warn!("Failed to read Dockerfile {:?}: {}", path, error);
46                return vec![default_package_data()];
47            }
48        };
49
50        vec![parse_dockerfile(&content)]
51    }
52}
53
54pub(crate) fn parse_dockerfile(content: &str) -> PackageData {
55    let oci_labels = extract_oci_labels(content);
56    let extra_data = (!oci_labels.is_empty())
57        .then(|| HashMap::from([("oci_labels".to_string(), json!(oci_labels))]));
58
59    PackageData {
60        package_type: Some(PACKAGE_TYPE),
61        primary_language: Some("Dockerfile".to_string()),
62        datasource_id: Some(DatasourceId::Dockerfile),
63        name: oci_labels.get("org.opencontainers.image.title").cloned(),
64        description: oci_labels
65            .get("org.opencontainers.image.description")
66            .cloned(),
67        homepage_url: oci_labels.get("org.opencontainers.image.url").cloned(),
68        vcs_url: oci_labels.get("org.opencontainers.image.source").cloned(),
69        version: oci_labels.get("org.opencontainers.image.version").cloned(),
70        extracted_license_statement: oci_labels.get("org.opencontainers.image.licenses").cloned(),
71        extra_data,
72        ..Default::default()
73    }
74}
75
76fn extract_oci_labels(content: &str) -> HashMap<String, String> {
77    let mut labels = HashMap::new();
78
79    for instruction in logical_lines(content) {
80        let trimmed = instruction.trim_start();
81        if !starts_with_instruction(trimmed, "LABEL") {
82            continue;
83        }
84
85        parse_label_instruction(trimmed[5..].trim_start(), &mut labels);
86    }
87
88    labels
89}
90
91fn logical_lines(content: &str) -> Vec<String> {
92    let mut lines = Vec::new();
93    let mut current = String::new();
94
95    for raw_line in content.lines() {
96        let line = raw_line.trim_end();
97        let trimmed = line.trim();
98
99        if current.is_empty() && (trimmed.is_empty() || trimmed.starts_with('#')) {
100            continue;
101        }
102
103        let has_continuation = ends_with_unescaped_backslash(line);
104        let segment = if has_continuation {
105            let mut without_backslash = line.trim_end().to_string();
106            without_backslash.pop();
107            without_backslash.trim().to_string()
108        } else {
109            trimmed.to_string()
110        };
111
112        if !segment.is_empty() {
113            if !current.is_empty() {
114                current.push(' ');
115            }
116            current.push_str(&segment);
117        }
118
119        if !has_continuation && !current.is_empty() {
120            lines.push(current.trim().to_string());
121            current.clear();
122        }
123    }
124
125    if !current.is_empty() {
126        lines.push(current.trim().to_string());
127    }
128
129    lines
130}
131
132fn ends_with_unescaped_backslash(line: &str) -> bool {
133    let trailing = line.chars().rev().take_while(|char| *char == '\\').count();
134    trailing % 2 == 1
135}
136
137fn starts_with_instruction(line: &str, instruction: &str) -> bool {
138    if line.len() < instruction.len()
139        || !line[..instruction.len()].eq_ignore_ascii_case(instruction)
140    {
141        return false;
142    }
143
144    line.chars()
145        .nth(instruction.len())
146        .is_none_or(|next| next.is_whitespace())
147}
148
149fn parse_label_instruction(rest: &str, labels: &mut HashMap<String, String>) {
150    let tokens = tokenize_label_arguments(rest);
151    if tokens.is_empty() {
152        return;
153    }
154
155    if tokens.first().is_some_and(|token| token.contains('=')) {
156        for token in tokens {
157            let Some((key, value)) = token.split_once('=') else {
158                continue;
159            };
160            let key = key.trim();
161            if key.starts_with(OCI_LABEL_PREFIX) {
162                labels.insert(key.to_string(), value.trim().to_string());
163            }
164        }
165        return;
166    }
167
168    if let Some((key, values)) = tokens.split_first()
169        && key.starts_with(OCI_LABEL_PREFIX)
170    {
171        labels.insert(key.to_string(), values.join(" ").trim().to_string());
172    }
173}
174
175fn tokenize_label_arguments(input: &str) -> Vec<String> {
176    let mut tokens = Vec::new();
177    let mut current = String::new();
178    let mut chars = input.chars().peekable();
179    let mut quote: Option<char> = None;
180
181    while let Some(ch) = chars.next() {
182        match quote {
183            Some(current_quote) => {
184                if ch == '\\' {
185                    if let Some(next) = chars.next() {
186                        current.push(next);
187                    }
188                } else if ch == current_quote {
189                    quote = None;
190                } else {
191                    current.push(ch);
192                }
193            }
194            None => match ch {
195                '"' | '\'' => quote = Some(ch),
196                '\\' => {
197                    if let Some(next) = chars.next() {
198                        current.push(next);
199                    }
200                }
201                whitespace if whitespace.is_whitespace() => {
202                    if !current.is_empty() {
203                        tokens.push(std::mem::take(&mut current));
204                    }
205                }
206                _ => current.push(ch),
207            },
208        }
209    }
210
211    if !current.is_empty() {
212        tokens.push(current);
213    }
214
215    tokens
216}
217
218crate::register_parser!(
219    "Dockerfile or Containerfile OCI image metadata",
220    &[
221        "**/Dockerfile",
222        "**/dockerfile",
223        "**/Containerfile",
224        "**/containerfile",
225        "**/Containerfile.core",
226        "**/containerfile.core",
227    ],
228    "docker",
229    "Dockerfile",
230    Some("https://github.com/opencontainers/image-spec/blob/main/annotations.md"),
231);