provenant/parsers/
docker.rs1use std::collections::HashMap;
2use std::path::Path;
3
4use crate::parser_warn as warn;
5use serde_json::json;
6
7use crate::models::{DatasourceId, PackageData, PackageType};
8use crate::parsers::utils::read_file_to_string;
9
10use super::PackageParser;
11use super::license_normalization::normalize_spdx_declared_license;
12
13const PACKAGE_TYPE: PackageType = PackageType::Docker;
14const OCI_LABEL_PREFIX: &str = "org.opencontainers.image.";
15
16fn default_package_data() -> PackageData {
17 PackageData {
18 package_type: Some(PACKAGE_TYPE),
19 primary_language: Some("Dockerfile".to_string()),
20 datasource_id: Some(DatasourceId::Dockerfile),
21 ..Default::default()
22 }
23}
24
25pub struct DockerfileParser;
26
27impl PackageParser for DockerfileParser {
28 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
29
30 fn is_match(path: &Path) -> bool {
31 path.file_name()
32 .and_then(|name| name.to_str())
33 .map(|name| name.to_ascii_lowercase())
34 .is_some_and(|name| {
35 matches!(
36 name.as_str(),
37 "dockerfile" | "containerfile" | "containerfile.core"
38 )
39 })
40 }
41
42 fn extract_packages(path: &Path) -> Vec<PackageData> {
43 let content = match read_file_to_string(path) {
44 Ok(content) => content,
45 Err(error) => {
46 warn!("Failed to read Dockerfile {:?}: {}", path, error);
47 return vec![default_package_data()];
48 }
49 };
50
51 vec![parse_dockerfile(&content)]
52 }
53}
54
55pub(crate) fn parse_dockerfile(content: &str) -> PackageData {
56 let oci_labels = extract_oci_labels(content);
57 let extra_data = (!oci_labels.is_empty())
58 .then(|| HashMap::from([("oci_labels".to_string(), json!(oci_labels))]));
59 let extracted_license_statement = oci_labels.get("org.opencontainers.image.licenses").cloned();
60 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
61 normalize_spdx_declared_license(extracted_license_statement.as_deref());
62
63 PackageData {
64 package_type: Some(PACKAGE_TYPE),
65 primary_language: Some("Dockerfile".to_string()),
66 datasource_id: Some(DatasourceId::Dockerfile),
67 name: oci_labels.get("org.opencontainers.image.title").cloned(),
68 description: oci_labels
69 .get("org.opencontainers.image.description")
70 .cloned(),
71 homepage_url: oci_labels.get("org.opencontainers.image.url").cloned(),
72 vcs_url: oci_labels.get("org.opencontainers.image.source").cloned(),
73 version: oci_labels.get("org.opencontainers.image.version").cloned(),
74 declared_license_expression,
75 declared_license_expression_spdx,
76 license_detections,
77 extracted_license_statement,
78 extra_data,
79 ..Default::default()
80 }
81}
82
83fn extract_oci_labels(content: &str) -> HashMap<String, String> {
84 let mut labels = HashMap::new();
85
86 for instruction in logical_lines(content) {
87 let trimmed = instruction.trim_start();
88 if !starts_with_instruction(trimmed, "LABEL") {
89 continue;
90 }
91
92 parse_label_instruction(trimmed[5..].trim_start(), &mut labels);
93 }
94
95 labels
96}
97
98fn logical_lines(content: &str) -> Vec<String> {
99 let mut lines = Vec::new();
100 let mut current = String::new();
101
102 for raw_line in content.lines() {
103 let line = raw_line.trim_end();
104 let trimmed = line.trim();
105
106 if current.is_empty() && (trimmed.is_empty() || trimmed.starts_with('#')) {
107 continue;
108 }
109
110 let has_continuation = ends_with_unescaped_backslash(line);
111 let segment = if has_continuation {
112 let mut without_backslash = line.trim_end().to_string();
113 without_backslash.pop();
114 without_backslash.trim().to_string()
115 } else {
116 trimmed.to_string()
117 };
118
119 if !segment.is_empty() {
120 if !current.is_empty() {
121 current.push(' ');
122 }
123 current.push_str(&segment);
124 }
125
126 if !has_continuation && !current.is_empty() {
127 lines.push(current.trim().to_string());
128 current.clear();
129 }
130 }
131
132 if !current.is_empty() {
133 lines.push(current.trim().to_string());
134 }
135
136 lines
137}
138
139fn ends_with_unescaped_backslash(line: &str) -> bool {
140 let trailing = line.chars().rev().take_while(|char| *char == '\\').count();
141 trailing % 2 == 1
142}
143
144fn starts_with_instruction(line: &str, instruction: &str) -> bool {
145 if line.len() < instruction.len()
146 || !line[..instruction.len()].eq_ignore_ascii_case(instruction)
147 {
148 return false;
149 }
150
151 line.chars()
152 .nth(instruction.len())
153 .is_none_or(|next| next.is_whitespace())
154}
155
156fn parse_label_instruction(rest: &str, labels: &mut HashMap<String, String>) {
157 let tokens = tokenize_label_arguments(rest);
158 if tokens.is_empty() {
159 return;
160 }
161
162 if tokens.first().is_some_and(|token| token.contains('=')) {
163 for token in tokens {
164 let Some((key, value)) = token.split_once('=') else {
165 continue;
166 };
167 let key = key.trim();
168 if key.starts_with(OCI_LABEL_PREFIX) {
169 labels.insert(key.to_string(), value.trim().to_string());
170 }
171 }
172 return;
173 }
174
175 if let Some((key, values)) = tokens.split_first()
176 && key.starts_with(OCI_LABEL_PREFIX)
177 {
178 labels.insert(key.to_string(), values.join(" ").trim().to_string());
179 }
180}
181
182fn tokenize_label_arguments(input: &str) -> Vec<String> {
183 let mut tokens = Vec::new();
184 let mut current = String::new();
185 let mut chars = input.chars().peekable();
186 let mut quote: Option<char> = None;
187
188 while let Some(ch) = chars.next() {
189 match quote {
190 Some(current_quote) => {
191 if ch == '\\' {
192 if let Some(next) = chars.next() {
193 current.push(next);
194 }
195 } else if ch == current_quote {
196 quote = None;
197 } else {
198 current.push(ch);
199 }
200 }
201 None => match ch {
202 '"' | '\'' => quote = Some(ch),
203 '\\' => {
204 if let Some(next) = chars.next() {
205 current.push(next);
206 }
207 }
208 whitespace if whitespace.is_whitespace() => {
209 if !current.is_empty() {
210 tokens.push(std::mem::take(&mut current));
211 }
212 }
213 _ => current.push(ch),
214 },
215 }
216 }
217
218 if !current.is_empty() {
219 tokens.push(current);
220 }
221
222 tokens
223}
224
225crate::register_parser!(
226 "Dockerfile or Containerfile OCI image metadata",
227 &[
228 "**/Dockerfile",
229 "**/dockerfile",
230 "**/Containerfile",
231 "**/containerfile",
232 "**/Containerfile.core",
233 "**/containerfile.core",
234 ],
235 "docker",
236 "Dockerfile",
237 Some("https://github.com/opencontainers/image-spec/blob/main/annotations.md"),
238);