provenant/parsers/
docker.rs1use std::collections::HashMap;
5use std::path::Path;
6
7use crate::parser_warn as warn;
8use serde_json::json;
9
10use crate::models::{DatasourceId, PackageData, PackageType};
11use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
12
13use super::PackageParser;
14use super::license_normalization::normalize_spdx_declared_license;
15use super::metadata::ParserMetadata;
16
17const PACKAGE_TYPE: PackageType = PackageType::Docker;
18const OCI_LABEL_PREFIX: &str = "org.opencontainers.image.";
19
20fn default_package_data() -> PackageData {
21 PackageData {
22 package_type: Some(PACKAGE_TYPE),
23 primary_language: Some("Dockerfile".to_string()),
24 datasource_id: Some(DatasourceId::Dockerfile),
25 ..Default::default()
26 }
27}
28
29pub struct DockerfileParser;
30
31impl PackageParser for DockerfileParser {
32 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
33
34 fn metadata() -> Vec<ParserMetadata> {
35 vec![ParserMetadata {
36 description: "Dockerfile or Containerfile OCI image metadata",
37 file_patterns: &[
38 "**/Dockerfile",
39 "**/dockerfile",
40 "**/Containerfile",
41 "**/containerfile",
42 "**/Containerfile.core",
43 "**/containerfile.core",
44 ],
45 package_type: "docker",
46 primary_language: "Dockerfile",
47 documentation_url: Some(
48 "https://github.com/opencontainers/image-spec/blob/main/annotations.md",
49 ),
50 }]
51 }
52
53 fn is_match(path: &Path) -> bool {
54 path.file_name()
55 .and_then(|name| name.to_str())
56 .map(|name| name.to_ascii_lowercase())
57 .is_some_and(|name| {
58 matches!(
59 name.as_str(),
60 "dockerfile" | "containerfile" | "containerfile.core"
61 )
62 })
63 }
64
65 fn extract_packages(path: &Path) -> Vec<PackageData> {
66 let content = match read_file_to_string(path, None) {
67 Ok(content) => content,
68 Err(error) => {
69 warn!("Failed to read Dockerfile {:?}: {}", path, error);
70 return vec![default_package_data()];
71 }
72 };
73
74 vec![parse_dockerfile(&content)]
75 }
76}
77
78pub(crate) fn parse_dockerfile(content: &str) -> PackageData {
79 let oci_labels = extract_oci_labels(content);
80 let extra_data = (!oci_labels.is_empty())
81 .then(|| HashMap::from([("oci_labels".to_string(), json!(oci_labels))]));
82 let extracted_license_statement = oci_labels.get("org.opencontainers.image.licenses").cloned();
83 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
84 normalize_spdx_declared_license(extracted_license_statement.as_deref());
85
86 PackageData {
87 package_type: Some(PACKAGE_TYPE),
88 primary_language: Some("Dockerfile".to_string()),
89 datasource_id: Some(DatasourceId::Dockerfile),
90 name: oci_labels
91 .get("org.opencontainers.image.title")
92 .map(|v| truncate_field(v.clone())),
93 description: oci_labels
94 .get("org.opencontainers.image.description")
95 .map(|v| truncate_field(v.clone())),
96 homepage_url: oci_labels
97 .get("org.opencontainers.image.url")
98 .map(|v| truncate_field(v.clone())),
99 vcs_url: oci_labels
100 .get("org.opencontainers.image.source")
101 .map(|v| truncate_field(v.clone())),
102 version: oci_labels
103 .get("org.opencontainers.image.version")
104 .map(|v| truncate_field(v.clone())),
105 declared_license_expression,
106 declared_license_expression_spdx,
107 license_detections,
108 extracted_license_statement: extracted_license_statement.map(truncate_field),
109 extra_data,
110 ..Default::default()
111 }
112}
113
114fn extract_oci_labels(content: &str) -> HashMap<String, String> {
115 let mut labels = HashMap::new();
116
117 for instruction in logical_lines(content) {
118 let trimmed = instruction.trim_start();
119 if !starts_with_instruction(trimmed, "LABEL") {
120 continue;
121 }
122
123 parse_label_instruction(trimmed[5..].trim_start(), &mut labels);
124 }
125
126 labels
127}
128
129fn logical_lines(content: &str) -> Vec<String> {
130 let mut lines = Vec::new();
131 let mut current = String::new();
132 let mut iterations = 0usize;
133
134 for raw_line in content.lines() {
135 iterations += 1;
136 if iterations > MAX_ITERATION_COUNT {
137 warn!("logical_lines: exceeded MAX_ITERATION_COUNT, truncating");
138 break;
139 }
140 let line = raw_line.trim_end();
141 let trimmed = line.trim();
142
143 if current.is_empty() && (trimmed.is_empty() || trimmed.starts_with('#')) {
144 continue;
145 }
146
147 let has_continuation = ends_with_unescaped_backslash(line);
148 let segment = if has_continuation {
149 let mut without_backslash = line.trim_end().to_string();
150 without_backslash.pop();
151 without_backslash.trim().to_string()
152 } else {
153 trimmed.to_string()
154 };
155
156 if !segment.is_empty() {
157 if !current.is_empty() {
158 current.push(' ');
159 }
160 current.push_str(&segment);
161 }
162
163 if !has_continuation && !current.is_empty() {
164 lines.push(current.trim().to_string());
165 current.clear();
166 }
167 }
168
169 if !current.is_empty() {
170 lines.push(current.trim().to_string());
171 }
172
173 lines
174}
175
176fn ends_with_unescaped_backslash(line: &str) -> bool {
177 let trailing = line.chars().rev().take_while(|char| *char == '\\').count();
178 trailing % 2 == 1
179}
180
181fn starts_with_instruction(line: &str, instruction: &str) -> bool {
182 if line.len() < instruction.len()
183 || !line[..instruction.len()].eq_ignore_ascii_case(instruction)
184 {
185 return false;
186 }
187
188 line.chars()
189 .nth(instruction.len())
190 .is_none_or(|next| next.is_whitespace())
191}
192
193fn parse_label_instruction(rest: &str, labels: &mut HashMap<String, String>) {
194 let tokens = tokenize_label_arguments(rest);
195 if tokens.is_empty() {
196 return;
197 }
198
199 if tokens.first().is_some_and(|token| token.contains('=')) {
200 for (i, token) in tokens.into_iter().enumerate() {
201 if i >= MAX_ITERATION_COUNT {
202 warn!("parse_label_instruction: exceeded MAX_ITERATION_COUNT, truncating");
203 break;
204 }
205 let Some((key, value)) = token.split_once('=') else {
206 continue;
207 };
208 let key = key.trim();
209 if key.starts_with(OCI_LABEL_PREFIX) {
210 labels.insert(key.to_string(), truncate_field(value.trim().to_string()));
211 }
212 }
213 return;
214 }
215
216 if let Some((key, values)) = tokens.split_first()
217 && key.starts_with(OCI_LABEL_PREFIX)
218 {
219 labels.insert(
220 key.to_string(),
221 truncate_field(values.join(" ").trim().to_string()),
222 );
223 }
224}
225
226fn tokenize_label_arguments(input: &str) -> Vec<String> {
227 let mut tokens = Vec::new();
228 let mut current = String::new();
229 let mut chars = input.chars().peekable();
230 let mut quote: Option<char> = None;
231 let mut iterations = 0usize;
232
233 while let Some(ch) = chars.next() {
234 iterations += 1;
235 if iterations > MAX_ITERATION_COUNT {
236 warn!("tokenize_label_arguments: exceeded MAX_ITERATION_COUNT, truncating");
237 break;
238 }
239 match quote {
240 Some(current_quote) => {
241 if ch == '\\' {
242 if let Some(next) = chars.next() {
243 current.push(next);
244 }
245 } else if ch == current_quote {
246 quote = None;
247 } else {
248 current.push(ch);
249 }
250 }
251 None => match ch {
252 '"' | '\'' => quote = Some(ch),
253 '\\' => {
254 if let Some(next) = chars.next() {
255 current.push(next);
256 }
257 }
258 whitespace if whitespace.is_whitespace() => {
259 if !current.is_empty() {
260 tokens.push(std::mem::take(&mut current));
261 }
262 }
263 _ => current.push(ch),
264 },
265 }
266 }
267
268 if !current.is_empty() {
269 tokens.push(current);
270 }
271
272 tokens
273}