provenant/parsers/
docker.rs1use std::collections::HashMap;
2use std::path::Path;
3
4use log::warn;
5use serde_json::json;
6
7use crate::models::{DatasourceId, PackageData, PackageType};
8use crate::parsers::utils::read_file_to_string;
9
10use super::PackageParser;
11
12const PACKAGE_TYPE: PackageType = PackageType::Docker;
13const OCI_LABEL_PREFIX: &str = "org.opencontainers.image.";
14
15fn default_package_data() -> PackageData {
16 PackageData {
17 package_type: Some(PACKAGE_TYPE),
18 primary_language: Some("Dockerfile".to_string()),
19 datasource_id: Some(DatasourceId::Dockerfile),
20 ..Default::default()
21 }
22}
23
24pub struct DockerfileParser;
25
26impl PackageParser for DockerfileParser {
27 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
28
29 fn is_match(path: &Path) -> bool {
30 path.file_name()
31 .and_then(|name| name.to_str())
32 .map(|name| name.to_ascii_lowercase())
33 .is_some_and(|name| {
34 matches!(
35 name.as_str(),
36 "dockerfile" | "containerfile" | "containerfile.core"
37 )
38 })
39 }
40
41 fn extract_packages(path: &Path) -> Vec<PackageData> {
42 let content = match read_file_to_string(path) {
43 Ok(content) => content,
44 Err(error) => {
45 warn!("Failed to read Dockerfile {:?}: {}", path, error);
46 return vec![default_package_data()];
47 }
48 };
49
50 vec![parse_dockerfile(&content)]
51 }
52}
53
54pub(crate) fn parse_dockerfile(content: &str) -> PackageData {
55 let oci_labels = extract_oci_labels(content);
56 let extra_data = (!oci_labels.is_empty())
57 .then(|| HashMap::from([("oci_labels".to_string(), json!(oci_labels))]));
58
59 PackageData {
60 package_type: Some(PACKAGE_TYPE),
61 primary_language: Some("Dockerfile".to_string()),
62 datasource_id: Some(DatasourceId::Dockerfile),
63 name: oci_labels.get("org.opencontainers.image.title").cloned(),
64 description: oci_labels
65 .get("org.opencontainers.image.description")
66 .cloned(),
67 homepage_url: oci_labels.get("org.opencontainers.image.url").cloned(),
68 vcs_url: oci_labels.get("org.opencontainers.image.source").cloned(),
69 version: oci_labels.get("org.opencontainers.image.version").cloned(),
70 extracted_license_statement: oci_labels.get("org.opencontainers.image.licenses").cloned(),
71 extra_data,
72 ..Default::default()
73 }
74}
75
76fn extract_oci_labels(content: &str) -> HashMap<String, String> {
77 let mut labels = HashMap::new();
78
79 for instruction in logical_lines(content) {
80 let trimmed = instruction.trim_start();
81 if !starts_with_instruction(trimmed, "LABEL") {
82 continue;
83 }
84
85 parse_label_instruction(trimmed[5..].trim_start(), &mut labels);
86 }
87
88 labels
89}
90
91fn logical_lines(content: &str) -> Vec<String> {
92 let mut lines = Vec::new();
93 let mut current = String::new();
94
95 for raw_line in content.lines() {
96 let line = raw_line.trim_end();
97 let trimmed = line.trim();
98
99 if current.is_empty() && (trimmed.is_empty() || trimmed.starts_with('#')) {
100 continue;
101 }
102
103 let has_continuation = ends_with_unescaped_backslash(line);
104 let segment = if has_continuation {
105 let mut without_backslash = line.trim_end().to_string();
106 without_backslash.pop();
107 without_backslash.trim().to_string()
108 } else {
109 trimmed.to_string()
110 };
111
112 if !segment.is_empty() {
113 if !current.is_empty() {
114 current.push(' ');
115 }
116 current.push_str(&segment);
117 }
118
119 if !has_continuation && !current.is_empty() {
120 lines.push(current.trim().to_string());
121 current.clear();
122 }
123 }
124
125 if !current.is_empty() {
126 lines.push(current.trim().to_string());
127 }
128
129 lines
130}
131
132fn ends_with_unescaped_backslash(line: &str) -> bool {
133 let trailing = line.chars().rev().take_while(|char| *char == '\\').count();
134 trailing % 2 == 1
135}
136
137fn starts_with_instruction(line: &str, instruction: &str) -> bool {
138 if line.len() < instruction.len()
139 || !line[..instruction.len()].eq_ignore_ascii_case(instruction)
140 {
141 return false;
142 }
143
144 line.chars()
145 .nth(instruction.len())
146 .is_none_or(|next| next.is_whitespace())
147}
148
149fn parse_label_instruction(rest: &str, labels: &mut HashMap<String, String>) {
150 let tokens = tokenize_label_arguments(rest);
151 if tokens.is_empty() {
152 return;
153 }
154
155 if tokens.first().is_some_and(|token| token.contains('=')) {
156 for token in tokens {
157 let Some((key, value)) = token.split_once('=') else {
158 continue;
159 };
160 let key = key.trim();
161 if key.starts_with(OCI_LABEL_PREFIX) {
162 labels.insert(key.to_string(), value.trim().to_string());
163 }
164 }
165 return;
166 }
167
168 if let Some((key, values)) = tokens.split_first()
169 && key.starts_with(OCI_LABEL_PREFIX)
170 {
171 labels.insert(key.to_string(), values.join(" ").trim().to_string());
172 }
173}
174
175fn tokenize_label_arguments(input: &str) -> Vec<String> {
176 let mut tokens = Vec::new();
177 let mut current = String::new();
178 let mut chars = input.chars().peekable();
179 let mut quote: Option<char> = None;
180
181 while let Some(ch) = chars.next() {
182 match quote {
183 Some(current_quote) => {
184 if ch == '\\' {
185 if let Some(next) = chars.next() {
186 current.push(next);
187 }
188 } else if ch == current_quote {
189 quote = None;
190 } else {
191 current.push(ch);
192 }
193 }
194 None => match ch {
195 '"' | '\'' => quote = Some(ch),
196 '\\' => {
197 if let Some(next) = chars.next() {
198 current.push(next);
199 }
200 }
201 whitespace if whitespace.is_whitespace() => {
202 if !current.is_empty() {
203 tokens.push(std::mem::take(&mut current));
204 }
205 }
206 _ => current.push(ch),
207 },
208 }
209 }
210
211 if !current.is_empty() {
212 tokens.push(current);
213 }
214
215 tokens
216}
217
218crate::register_parser!(
219 "Dockerfile or Containerfile OCI image metadata",
220 &[
221 "**/Dockerfile",
222 "**/dockerfile",
223 "**/Containerfile",
224 "**/containerfile",
225 "**/Containerfile.core",
226 "**/containerfile.core",
227 ],
228 "docker",
229 "Dockerfile",
230 Some("https://github.com/opencontainers/image-spec/blob/main/annotations.md"),
231);