provenant/parsers/
docker.rs1use std::collections::HashMap;
5use std::path::Path;
6
7use crate::parser_warn as warn;
8use serde_json::json;
9
10use crate::models::{DatasourceId, PackageData, PackageType};
11use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
12
13use super::PackageParser;
14use super::license_normalization::normalize_spdx_declared_license;
15
16const PACKAGE_TYPE: PackageType = PackageType::Docker;
17const OCI_LABEL_PREFIX: &str = "org.opencontainers.image.";
18
19fn default_package_data() -> PackageData {
20 PackageData {
21 package_type: Some(PACKAGE_TYPE),
22 primary_language: Some("Dockerfile".to_string()),
23 datasource_id: Some(DatasourceId::Dockerfile),
24 ..Default::default()
25 }
26}
27
28pub struct DockerfileParser;
29
30impl PackageParser for DockerfileParser {
31 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
32
33 fn is_match(path: &Path) -> bool {
34 path.file_name()
35 .and_then(|name| name.to_str())
36 .map(|name| name.to_ascii_lowercase())
37 .is_some_and(|name| {
38 matches!(
39 name.as_str(),
40 "dockerfile" | "containerfile" | "containerfile.core"
41 )
42 })
43 }
44
45 fn extract_packages(path: &Path) -> Vec<PackageData> {
46 let content = match read_file_to_string(path, None) {
47 Ok(content) => content,
48 Err(error) => {
49 warn!("Failed to read Dockerfile {:?}: {}", path, error);
50 return vec![default_package_data()];
51 }
52 };
53
54 vec![parse_dockerfile(&content)]
55 }
56}
57
58pub(crate) fn parse_dockerfile(content: &str) -> PackageData {
59 let oci_labels = extract_oci_labels(content);
60 let extra_data = (!oci_labels.is_empty())
61 .then(|| HashMap::from([("oci_labels".to_string(), json!(oci_labels))]));
62 let extracted_license_statement = oci_labels.get("org.opencontainers.image.licenses").cloned();
63 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
64 normalize_spdx_declared_license(extracted_license_statement.as_deref());
65
66 PackageData {
67 package_type: Some(PACKAGE_TYPE),
68 primary_language: Some("Dockerfile".to_string()),
69 datasource_id: Some(DatasourceId::Dockerfile),
70 name: oci_labels
71 .get("org.opencontainers.image.title")
72 .map(|v| truncate_field(v.clone())),
73 description: oci_labels
74 .get("org.opencontainers.image.description")
75 .map(|v| truncate_field(v.clone())),
76 homepage_url: oci_labels
77 .get("org.opencontainers.image.url")
78 .map(|v| truncate_field(v.clone())),
79 vcs_url: oci_labels
80 .get("org.opencontainers.image.source")
81 .map(|v| truncate_field(v.clone())),
82 version: oci_labels
83 .get("org.opencontainers.image.version")
84 .map(|v| truncate_field(v.clone())),
85 declared_license_expression,
86 declared_license_expression_spdx,
87 license_detections,
88 extracted_license_statement: extracted_license_statement.map(truncate_field),
89 extra_data,
90 ..Default::default()
91 }
92}
93
94fn extract_oci_labels(content: &str) -> HashMap<String, String> {
95 let mut labels = HashMap::new();
96
97 for instruction in logical_lines(content) {
98 let trimmed = instruction.trim_start();
99 if !starts_with_instruction(trimmed, "LABEL") {
100 continue;
101 }
102
103 parse_label_instruction(trimmed[5..].trim_start(), &mut labels);
104 }
105
106 labels
107}
108
109fn logical_lines(content: &str) -> Vec<String> {
110 let mut lines = Vec::new();
111 let mut current = String::new();
112 let mut iterations = 0usize;
113
114 for raw_line in content.lines() {
115 iterations += 1;
116 if iterations > MAX_ITERATION_COUNT {
117 warn!("logical_lines: exceeded MAX_ITERATION_COUNT, truncating");
118 break;
119 }
120 let line = raw_line.trim_end();
121 let trimmed = line.trim();
122
123 if current.is_empty() && (trimmed.is_empty() || trimmed.starts_with('#')) {
124 continue;
125 }
126
127 let has_continuation = ends_with_unescaped_backslash(line);
128 let segment = if has_continuation {
129 let mut without_backslash = line.trim_end().to_string();
130 without_backslash.pop();
131 without_backslash.trim().to_string()
132 } else {
133 trimmed.to_string()
134 };
135
136 if !segment.is_empty() {
137 if !current.is_empty() {
138 current.push(' ');
139 }
140 current.push_str(&segment);
141 }
142
143 if !has_continuation && !current.is_empty() {
144 lines.push(current.trim().to_string());
145 current.clear();
146 }
147 }
148
149 if !current.is_empty() {
150 lines.push(current.trim().to_string());
151 }
152
153 lines
154}
155
156fn ends_with_unescaped_backslash(line: &str) -> bool {
157 let trailing = line.chars().rev().take_while(|char| *char == '\\').count();
158 trailing % 2 == 1
159}
160
161fn starts_with_instruction(line: &str, instruction: &str) -> bool {
162 if line.len() < instruction.len()
163 || !line[..instruction.len()].eq_ignore_ascii_case(instruction)
164 {
165 return false;
166 }
167
168 line.chars()
169 .nth(instruction.len())
170 .is_none_or(|next| next.is_whitespace())
171}
172
173fn parse_label_instruction(rest: &str, labels: &mut HashMap<String, String>) {
174 let tokens = tokenize_label_arguments(rest);
175 if tokens.is_empty() {
176 return;
177 }
178
179 if tokens.first().is_some_and(|token| token.contains('=')) {
180 for (i, token) in tokens.into_iter().enumerate() {
181 if i >= MAX_ITERATION_COUNT {
182 warn!("parse_label_instruction: exceeded MAX_ITERATION_COUNT, truncating");
183 break;
184 }
185 let Some((key, value)) = token.split_once('=') else {
186 continue;
187 };
188 let key = key.trim();
189 if key.starts_with(OCI_LABEL_PREFIX) {
190 labels.insert(key.to_string(), truncate_field(value.trim().to_string()));
191 }
192 }
193 return;
194 }
195
196 if let Some((key, values)) = tokens.split_first()
197 && key.starts_with(OCI_LABEL_PREFIX)
198 {
199 labels.insert(
200 key.to_string(),
201 truncate_field(values.join(" ").trim().to_string()),
202 );
203 }
204}
205
206fn tokenize_label_arguments(input: &str) -> Vec<String> {
207 let mut tokens = Vec::new();
208 let mut current = String::new();
209 let mut chars = input.chars().peekable();
210 let mut quote: Option<char> = None;
211 let mut iterations = 0usize;
212
213 while let Some(ch) = chars.next() {
214 iterations += 1;
215 if iterations > MAX_ITERATION_COUNT {
216 warn!("tokenize_label_arguments: exceeded MAX_ITERATION_COUNT, truncating");
217 break;
218 }
219 match quote {
220 Some(current_quote) => {
221 if ch == '\\' {
222 if let Some(next) = chars.next() {
223 current.push(next);
224 }
225 } else if ch == current_quote {
226 quote = None;
227 } else {
228 current.push(ch);
229 }
230 }
231 None => match ch {
232 '"' | '\'' => quote = Some(ch),
233 '\\' => {
234 if let Some(next) = chars.next() {
235 current.push(next);
236 }
237 }
238 whitespace if whitespace.is_whitespace() => {
239 if !current.is_empty() {
240 tokens.push(std::mem::take(&mut current));
241 }
242 }
243 _ => current.push(ch),
244 },
245 }
246 }
247
248 if !current.is_empty() {
249 tokens.push(current);
250 }
251
252 tokens
253}
254
255crate::register_parser!(
256 "Dockerfile or Containerfile OCI image metadata",
257 &[
258 "**/Dockerfile",
259 "**/dockerfile",
260 "**/Containerfile",
261 "**/containerfile",
262 "**/Containerfile.core",
263 "**/containerfile.core",
264 ],
265 "docker",
266 "Dockerfile",
267 Some("https://github.com/opencontainers/image-spec/blob/main/annotations.md"),
268);