provenant/parsers/
gitmodules.rs1use std::collections::HashMap;
24use std::path::Path;
25
26use crate::parser_warn as warn;
27
28use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
29use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
30
31use super::PackageParser;
32
33const PACKAGE_TYPE: PackageType = PackageType::Github;
34
35fn default_package_data() -> PackageData {
36 PackageData {
37 package_type: Some(PACKAGE_TYPE),
38 datasource_id: Some(DatasourceId::Gitmodules),
39 ..Default::default()
40 }
41}
42
43pub struct GitmodulesParser;
44
45impl PackageParser for GitmodulesParser {
46 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
47
48 fn is_match(path: &Path) -> bool {
49 path.file_name().is_some_and(|name| name == ".gitmodules")
50 }
51
52 fn extract_packages(path: &Path) -> Vec<PackageData> {
53 let content = match read_file_to_string(path, None) {
54 Ok(c) => c,
55 Err(e) => {
56 warn!("Failed to read .gitmodules {:?}: {}", path, e);
57 return vec![default_package_data()];
58 }
59 };
60
61 let submodules = parse_gitmodules(&content);
62 if submodules.is_empty() {
63 return vec![default_package_data()];
64 }
65
66 let dependencies: Vec<Dependency> = submodules
67 .into_iter()
68 .take(MAX_ITERATION_COUNT)
69 .map(|sub| Dependency {
70 purl: sub.purl.map(truncate_field),
71 extracted_requirement: Some(truncate_field(format!("{} at {}", sub.path, sub.url))),
72 scope: Some(truncate_field("runtime".to_string())),
73 is_runtime: Some(true),
74 is_optional: Some(false),
75 is_direct: Some(true),
76 resolved_package: None,
77 extra_data: None,
78 is_pinned: Some(false),
79 })
80 .collect();
81
82 vec![PackageData {
83 package_type: Some(PACKAGE_TYPE),
84 datasource_id: Some(DatasourceId::Gitmodules),
85 dependencies,
86 ..Default::default()
87 }]
88 }
89}
90
91struct Submodule {
92 path: String,
93 url: String,
94 purl: Option<String>,
95}
96
97fn parse_gitmodules(content: &str) -> Vec<Submodule> {
98 let mut submodules = Vec::new();
99 let mut current_section: Option<HashMap<String, String>> = None;
100 let mut current_name: Option<String> = None;
101
102 for line in content.lines().take(MAX_ITERATION_COUNT) {
103 let line = line.trim();
104
105 if line.is_empty() || line.starts_with('#') || line.starts_with(';') {
106 continue;
107 }
108
109 if line.starts_with('[') && line.ends_with(']') {
110 if let Some(section) = current_section.take()
111 && let Some(name) = current_name.take()
112 && let Some(submodule) = build_submodule(name, section)
113 {
114 submodules.push(submodule);
115 }
116
117 let section_name = &line[1..line.len() - 1];
118 if let Some(stripped) = section_name.strip_prefix("submodule ") {
119 current_name = Some(truncate_field(stripped.trim_matches('"').to_string()));
120 current_section = Some(HashMap::new());
121 }
122 } else if let Some(ref mut section) = current_section
123 && let Some((key, value)) = line.split_once('=')
124 {
125 let key = truncate_field(key.trim().to_string());
126 let value = truncate_field(value.trim().to_string());
127 section.insert(key, value);
128 }
129 }
130
131 if let Some(section) = current_section
132 && let Some(name) = current_name
133 && let Some(submodule) = build_submodule(name, section)
134 {
135 submodules.push(submodule);
136 }
137
138 submodules
139}
140
141fn build_submodule(_name: String, section: HashMap<String, String>) -> Option<Submodule> {
142 let path = truncate_field(section.get("path").cloned().unwrap_or_default());
143 let url = truncate_field(section.get("url").cloned().unwrap_or_default());
144
145 if path.is_empty() && url.is_empty() {
146 return None;
147 }
148
149 let purl = build_purl_from_url(&url);
150
151 Some(Submodule { path, url, purl })
152}
153
154fn build_purl_from_url(url: &str) -> Option<String> {
155 if url.is_empty() {
156 return None;
157 }
158
159 if let Some(purl) = parse_github_url(url) {
160 return Some(purl);
161 }
162
163 if let Some(purl) = parse_gitlab_url(url) {
164 return Some(purl);
165 }
166
167 None
168}
169
170fn parse_github_url(url: &str) -> Option<String> {
171 let (namespace, name) = if url.starts_with("https://github.com/") {
172 let path = url.strip_prefix("https://github.com/")?;
173 parse_repo_path(path)?
174 } else if url.starts_with("git@github.com:") {
175 let path = url.strip_prefix("git@github.com:")?;
176 parse_repo_path(path)?
177 } else {
178 return None;
179 };
180
181 Some(truncate_field(format!("pkg:github/{}/{}", namespace, name)))
182}
183
184fn parse_gitlab_url(url: &str) -> Option<String> {
185 let (namespace, name) = if url.starts_with("https://gitlab.com/") {
186 let path = url.strip_prefix("https://gitlab.com/")?;
187 parse_repo_path(path)?
188 } else if url.starts_with("git@gitlab.com:") {
189 let path = url.strip_prefix("git@gitlab.com:")?;
190 parse_repo_path(path)?
191 } else {
192 return None;
193 };
194
195 Some(truncate_field(format!("pkg:gitlab/{}/{}", namespace, name)))
196}
197
198fn parse_repo_path(path: &str) -> Option<(String, String)> {
199 let path = path.strip_suffix(".git").unwrap_or(path);
200 let parts: Vec<&str> = path.split('/').collect();
201
202 if parts.len() < 2 {
203 return None;
204 }
205
206 let name = truncate_field(parts.last()?.to_string());
207 let namespace = truncate_field(parts[..parts.len() - 1].join("/"));
208
209 if namespace.is_empty() || name.is_empty() {
210 return None;
211 }
212
213 Some((namespace, name))
214}
215
216#[cfg(test)]
217mod tests {
218 use super::*;
219 use std::io::Write;
220 use tempfile::NamedTempFile;
221
222 fn create_gitmodules_file(content: &str) -> NamedTempFile {
223 let mut file = NamedTempFile::new().unwrap();
224 file.write_all(content.as_bytes()).unwrap();
225 file
226 }
227
228 #[test]
229 fn test_is_match() {
230 assert!(GitmodulesParser::is_match(Path::new(".gitmodules")));
231 assert!(GitmodulesParser::is_match(Path::new(
232 "/path/to/.gitmodules"
233 )));
234 assert!(!GitmodulesParser::is_match(Path::new("gitmodules")));
235 assert!(!GitmodulesParser::is_match(Path::new(".gitmodules.bak")));
236 }
237
238 #[test]
239 fn test_parse_single_submodule() {
240 let content = r#"
241[submodule "dep-lib"]
242 path = lib/dep
243 url = https://github.com/user/dep-lib.git
244"#;
245 let file = create_gitmodules_file(content);
246 let pkgs = GitmodulesParser::extract_packages(file.path());
247 assert_eq!(pkgs.len(), 1);
248 assert_eq!(pkgs[0].dependencies.len(), 1);
249 let dep = &pkgs[0].dependencies[0];
250 assert_eq!(dep.purl, Some("pkg:github/user/dep-lib".to_string()));
251 }
252
253 #[test]
254 fn test_parse_multiple_submodules() {
255 let content = r#"
256[submodule "lib1"]
257 path = libs/lib1
258 url = https://github.com/org/lib1.git
259
260[submodule "lib2"]
261 path = libs/lib2
262 url = git@github.com:org/lib2.git
263"#;
264 let file = create_gitmodules_file(content);
265 let pkgs = GitmodulesParser::extract_packages(file.path());
266 assert_eq!(pkgs.len(), 1);
267 assert_eq!(pkgs[0].dependencies.len(), 2);
268 }
269
270 #[test]
271 fn test_parse_git_ssh_url() {
272 let content = r#"
273[submodule "private-repo"]
274 path = private
275 url = git@github.com:company/private-repo.git
276"#;
277 let file = create_gitmodules_file(content);
278 let pkgs = GitmodulesParser::extract_packages(file.path());
279 let dep = &pkgs[0].dependencies[0];
280 assert_eq!(
281 dep.purl,
282 Some("pkg:github/company/private-repo".to_string())
283 );
284 }
285
286 #[test]
287 fn test_parse_gitlab_url() {
288 let content = r#"
289[submodule "gitlab-dep"]
290 path = gitlab-lib
291 url = https://gitlab.com/group/project.git
292"#;
293 let file = create_gitmodules_file(content);
294 let pkgs = GitmodulesParser::extract_packages(file.path());
295 let dep = &pkgs[0].dependencies[0];
296 assert_eq!(dep.purl, Some("pkg:gitlab/group/project".to_string()));
297 }
298
299 #[test]
300 fn test_parse_unknown_url() {
301 let content = r#"
302[submodule "custom"]
303 path = custom
304 url = https://example.com/repo.git
305"#;
306 let file = create_gitmodules_file(content);
307 let pkgs = GitmodulesParser::extract_packages(file.path());
308 let dep = &pkgs[0].dependencies[0];
309 assert!(dep.purl.is_none());
310 assert!(
311 dep.extracted_requirement
312 .as_ref()
313 .unwrap()
314 .contains("https://example.com/repo.git")
315 );
316 }
317
318 #[test]
319 fn test_parse_empty_file() {
320 let content = "";
321 let file = create_gitmodules_file(content);
322 let pkgs = GitmodulesParser::extract_packages(file.path());
323 assert_eq!(pkgs.len(), 1);
324 assert!(pkgs[0].dependencies.is_empty());
325 }
326
327 #[test]
328 fn test_parse_with_comments() {
329 let content = r#"
330# This is a comment
331[submodule "lib"]
332 ; another comment
333 path = lib
334 url = https://github.com/user/lib.git
335"#;
336 let file = create_gitmodules_file(content);
337 let pkgs = GitmodulesParser::extract_packages(file.path());
338 assert_eq!(pkgs[0].dependencies.len(), 1);
339 }
340}
341
342crate::register_parser!(
343 "Git submodules manifest",
344 &["**/.gitmodules"],
345 "gitmodules",
346 "",
347 Some("https://git-scm.com/docs/gitmodules"),
348);