provenant/parsers/
docker.rs1use std::collections::HashMap;
2use std::path::Path;
3
4use crate::parser_warn as warn;
5use serde_json::json;
6
7use crate::models::{DatasourceId, PackageData, PackageType};
8use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
9
10use super::PackageParser;
11use super::license_normalization::normalize_spdx_declared_license;
12
13const PACKAGE_TYPE: PackageType = PackageType::Docker;
14const OCI_LABEL_PREFIX: &str = "org.opencontainers.image.";
15
16fn default_package_data() -> PackageData {
17 PackageData {
18 package_type: Some(PACKAGE_TYPE),
19 primary_language: Some("Dockerfile".to_string()),
20 datasource_id: Some(DatasourceId::Dockerfile),
21 ..Default::default()
22 }
23}
24
25pub struct DockerfileParser;
26
27impl PackageParser for DockerfileParser {
28 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
29
30 fn is_match(path: &Path) -> bool {
31 path.file_name()
32 .and_then(|name| name.to_str())
33 .map(|name| name.to_ascii_lowercase())
34 .is_some_and(|name| {
35 matches!(
36 name.as_str(),
37 "dockerfile" | "containerfile" | "containerfile.core"
38 )
39 })
40 }
41
42 fn extract_packages(path: &Path) -> Vec<PackageData> {
43 let content = match read_file_to_string(path, None) {
44 Ok(content) => content,
45 Err(error) => {
46 warn!("Failed to read Dockerfile {:?}: {}", path, error);
47 return vec![default_package_data()];
48 }
49 };
50
51 vec![parse_dockerfile(&content)]
52 }
53}
54
55pub(crate) fn parse_dockerfile(content: &str) -> PackageData {
56 let oci_labels = extract_oci_labels(content);
57 let extra_data = (!oci_labels.is_empty())
58 .then(|| HashMap::from([("oci_labels".to_string(), json!(oci_labels))]));
59 let extracted_license_statement = oci_labels.get("org.opencontainers.image.licenses").cloned();
60 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
61 normalize_spdx_declared_license(extracted_license_statement.as_deref());
62
63 PackageData {
64 package_type: Some(PACKAGE_TYPE),
65 primary_language: Some("Dockerfile".to_string()),
66 datasource_id: Some(DatasourceId::Dockerfile),
67 name: oci_labels
68 .get("org.opencontainers.image.title")
69 .map(|v| truncate_field(v.clone())),
70 description: oci_labels
71 .get("org.opencontainers.image.description")
72 .map(|v| truncate_field(v.clone())),
73 homepage_url: oci_labels
74 .get("org.opencontainers.image.url")
75 .map(|v| truncate_field(v.clone())),
76 vcs_url: oci_labels
77 .get("org.opencontainers.image.source")
78 .map(|v| truncate_field(v.clone())),
79 version: oci_labels
80 .get("org.opencontainers.image.version")
81 .map(|v| truncate_field(v.clone())),
82 declared_license_expression,
83 declared_license_expression_spdx,
84 license_detections,
85 extracted_license_statement: extracted_license_statement.map(truncate_field),
86 extra_data,
87 ..Default::default()
88 }
89}
90
91fn extract_oci_labels(content: &str) -> HashMap<String, String> {
92 let mut labels = HashMap::new();
93
94 for instruction in logical_lines(content) {
95 let trimmed = instruction.trim_start();
96 if !starts_with_instruction(trimmed, "LABEL") {
97 continue;
98 }
99
100 parse_label_instruction(trimmed[5..].trim_start(), &mut labels);
101 }
102
103 labels
104}
105
106fn logical_lines(content: &str) -> Vec<String> {
107 let mut lines = Vec::new();
108 let mut current = String::new();
109 let mut iterations = 0usize;
110
111 for raw_line in content.lines() {
112 iterations += 1;
113 if iterations > MAX_ITERATION_COUNT {
114 warn!("logical_lines: exceeded MAX_ITERATION_COUNT, truncating");
115 break;
116 }
117 let line = raw_line.trim_end();
118 let trimmed = line.trim();
119
120 if current.is_empty() && (trimmed.is_empty() || trimmed.starts_with('#')) {
121 continue;
122 }
123
124 let has_continuation = ends_with_unescaped_backslash(line);
125 let segment = if has_continuation {
126 let mut without_backslash = line.trim_end().to_string();
127 without_backslash.pop();
128 without_backslash.trim().to_string()
129 } else {
130 trimmed.to_string()
131 };
132
133 if !segment.is_empty() {
134 if !current.is_empty() {
135 current.push(' ');
136 }
137 current.push_str(&segment);
138 }
139
140 if !has_continuation && !current.is_empty() {
141 lines.push(current.trim().to_string());
142 current.clear();
143 }
144 }
145
146 if !current.is_empty() {
147 lines.push(current.trim().to_string());
148 }
149
150 lines
151}
152
153fn ends_with_unescaped_backslash(line: &str) -> bool {
154 let trailing = line.chars().rev().take_while(|char| *char == '\\').count();
155 trailing % 2 == 1
156}
157
158fn starts_with_instruction(line: &str, instruction: &str) -> bool {
159 if line.len() < instruction.len()
160 || !line[..instruction.len()].eq_ignore_ascii_case(instruction)
161 {
162 return false;
163 }
164
165 line.chars()
166 .nth(instruction.len())
167 .is_none_or(|next| next.is_whitespace())
168}
169
170fn parse_label_instruction(rest: &str, labels: &mut HashMap<String, String>) {
171 let tokens = tokenize_label_arguments(rest);
172 if tokens.is_empty() {
173 return;
174 }
175
176 if tokens.first().is_some_and(|token| token.contains('=')) {
177 for (i, token) in tokens.into_iter().enumerate() {
178 if i >= MAX_ITERATION_COUNT {
179 warn!("parse_label_instruction: exceeded MAX_ITERATION_COUNT, truncating");
180 break;
181 }
182 let Some((key, value)) = token.split_once('=') else {
183 continue;
184 };
185 let key = key.trim();
186 if key.starts_with(OCI_LABEL_PREFIX) {
187 labels.insert(key.to_string(), truncate_field(value.trim().to_string()));
188 }
189 }
190 return;
191 }
192
193 if let Some((key, values)) = tokens.split_first()
194 && key.starts_with(OCI_LABEL_PREFIX)
195 {
196 labels.insert(
197 key.to_string(),
198 truncate_field(values.join(" ").trim().to_string()),
199 );
200 }
201}
202
203fn tokenize_label_arguments(input: &str) -> Vec<String> {
204 let mut tokens = Vec::new();
205 let mut current = String::new();
206 let mut chars = input.chars().peekable();
207 let mut quote: Option<char> = None;
208 let mut iterations = 0usize;
209
210 while let Some(ch) = chars.next() {
211 iterations += 1;
212 if iterations > MAX_ITERATION_COUNT {
213 warn!("tokenize_label_arguments: exceeded MAX_ITERATION_COUNT, truncating");
214 break;
215 }
216 match quote {
217 Some(current_quote) => {
218 if ch == '\\' {
219 if let Some(next) = chars.next() {
220 current.push(next);
221 }
222 } else if ch == current_quote {
223 quote = None;
224 } else {
225 current.push(ch);
226 }
227 }
228 None => match ch {
229 '"' | '\'' => quote = Some(ch),
230 '\\' => {
231 if let Some(next) = chars.next() {
232 current.push(next);
233 }
234 }
235 whitespace if whitespace.is_whitespace() => {
236 if !current.is_empty() {
237 tokens.push(std::mem::take(&mut current));
238 }
239 }
240 _ => current.push(ch),
241 },
242 }
243 }
244
245 if !current.is_empty() {
246 tokens.push(current);
247 }
248
249 tokens
250}
251
252crate::register_parser!(
253 "Dockerfile or Containerfile OCI image metadata",
254 &[
255 "**/Dockerfile",
256 "**/dockerfile",
257 "**/Containerfile",
258 "**/containerfile",
259 "**/Containerfile.core",
260 "**/containerfile.core",
261 ],
262 "docker",
263 "Dockerfile",
264 Some("https://github.com/opencontainers/image-spec/blob/main/annotations.md"),
265);