provenant/parsers/
gitmodules.rs1use std::collections::HashMap;
21use std::path::Path;
22
23use crate::parser_warn as warn;
24
25use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
26use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
27
28use super::PackageParser;
29
30const PACKAGE_TYPE: PackageType = PackageType::Github;
31
32fn default_package_data() -> PackageData {
33 PackageData {
34 package_type: Some(PACKAGE_TYPE),
35 datasource_id: Some(DatasourceId::Gitmodules),
36 ..Default::default()
37 }
38}
39
40pub struct GitmodulesParser;
41
42impl PackageParser for GitmodulesParser {
43 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
44
45 fn is_match(path: &Path) -> bool {
46 path.file_name().is_some_and(|name| name == ".gitmodules")
47 }
48
49 fn extract_packages(path: &Path) -> Vec<PackageData> {
50 let content = match read_file_to_string(path, None) {
51 Ok(c) => c,
52 Err(e) => {
53 warn!("Failed to read .gitmodules {:?}: {}", path, e);
54 return vec![default_package_data()];
55 }
56 };
57
58 let submodules = parse_gitmodules(&content);
59 if submodules.is_empty() {
60 return vec![default_package_data()];
61 }
62
63 let dependencies: Vec<Dependency> = submodules
64 .into_iter()
65 .take(MAX_ITERATION_COUNT)
66 .map(|sub| Dependency {
67 purl: sub.purl.map(truncate_field),
68 extracted_requirement: Some(truncate_field(format!("{} at {}", sub.path, sub.url))),
69 scope: Some(truncate_field("runtime".to_string())),
70 is_runtime: Some(true),
71 is_optional: Some(false),
72 is_direct: Some(true),
73 resolved_package: None,
74 extra_data: None,
75 is_pinned: Some(false),
76 })
77 .collect();
78
79 vec![PackageData {
80 package_type: Some(PACKAGE_TYPE),
81 datasource_id: Some(DatasourceId::Gitmodules),
82 dependencies,
83 ..Default::default()
84 }]
85 }
86}
87
88struct Submodule {
89 path: String,
90 url: String,
91 purl: Option<String>,
92}
93
94fn parse_gitmodules(content: &str) -> Vec<Submodule> {
95 let mut submodules = Vec::new();
96 let mut current_section: Option<HashMap<String, String>> = None;
97 let mut current_name: Option<String> = None;
98
99 for line in content.lines().take(MAX_ITERATION_COUNT) {
100 let line = line.trim();
101
102 if line.is_empty() || line.starts_with('#') || line.starts_with(';') {
103 continue;
104 }
105
106 if line.starts_with('[') && line.ends_with(']') {
107 if let Some(section) = current_section.take()
108 && let Some(name) = current_name.take()
109 && let Some(submodule) = build_submodule(name, section)
110 {
111 submodules.push(submodule);
112 }
113
114 let section_name = &line[1..line.len() - 1];
115 if let Some(stripped) = section_name.strip_prefix("submodule ") {
116 current_name = Some(truncate_field(stripped.trim_matches('"').to_string()));
117 current_section = Some(HashMap::new());
118 }
119 } else if let Some(ref mut section) = current_section
120 && let Some((key, value)) = line.split_once('=')
121 {
122 let key = truncate_field(key.trim().to_string());
123 let value = truncate_field(value.trim().to_string());
124 section.insert(key, value);
125 }
126 }
127
128 if let Some(section) = current_section
129 && let Some(name) = current_name
130 && let Some(submodule) = build_submodule(name, section)
131 {
132 submodules.push(submodule);
133 }
134
135 submodules
136}
137
138fn build_submodule(_name: String, section: HashMap<String, String>) -> Option<Submodule> {
139 let path = truncate_field(section.get("path").cloned().unwrap_or_default());
140 let url = truncate_field(section.get("url").cloned().unwrap_or_default());
141
142 if path.is_empty() && url.is_empty() {
143 return None;
144 }
145
146 let purl = build_purl_from_url(&url);
147
148 Some(Submodule { path, url, purl })
149}
150
151fn build_purl_from_url(url: &str) -> Option<String> {
152 if url.is_empty() {
153 return None;
154 }
155
156 if let Some(purl) = parse_github_url(url) {
157 return Some(purl);
158 }
159
160 if let Some(purl) = parse_gitlab_url(url) {
161 return Some(purl);
162 }
163
164 None
165}
166
167fn parse_github_url(url: &str) -> Option<String> {
168 let (namespace, name) = if url.starts_with("https://github.com/") {
169 let path = url.strip_prefix("https://github.com/")?;
170 parse_repo_path(path)?
171 } else if url.starts_with("git@github.com:") {
172 let path = url.strip_prefix("git@github.com:")?;
173 parse_repo_path(path)?
174 } else {
175 return None;
176 };
177
178 Some(truncate_field(format!("pkg:github/{}/{}", namespace, name)))
179}
180
181fn parse_gitlab_url(url: &str) -> Option<String> {
182 let (namespace, name) = if url.starts_with("https://gitlab.com/") {
183 let path = url.strip_prefix("https://gitlab.com/")?;
184 parse_repo_path(path)?
185 } else if url.starts_with("git@gitlab.com:") {
186 let path = url.strip_prefix("git@gitlab.com:")?;
187 parse_repo_path(path)?
188 } else {
189 return None;
190 };
191
192 Some(truncate_field(format!("pkg:gitlab/{}/{}", namespace, name)))
193}
194
195fn parse_repo_path(path: &str) -> Option<(String, String)> {
196 let path = path.strip_suffix(".git").unwrap_or(path);
197 let parts: Vec<&str> = path.split('/').collect();
198
199 if parts.len() < 2 {
200 return None;
201 }
202
203 let name = truncate_field(parts.last()?.to_string());
204 let namespace = truncate_field(parts[..parts.len() - 1].join("/"));
205
206 if namespace.is_empty() || name.is_empty() {
207 return None;
208 }
209
210 Some((namespace, name))
211}
212
213#[cfg(test)]
214mod tests {
215 use super::*;
216 use std::io::Write;
217 use tempfile::NamedTempFile;
218
219 fn create_gitmodules_file(content: &str) -> NamedTempFile {
220 let mut file = NamedTempFile::new().unwrap();
221 file.write_all(content.as_bytes()).unwrap();
222 file
223 }
224
225 #[test]
226 fn test_is_match() {
227 assert!(GitmodulesParser::is_match(Path::new(".gitmodules")));
228 assert!(GitmodulesParser::is_match(Path::new(
229 "/path/to/.gitmodules"
230 )));
231 assert!(!GitmodulesParser::is_match(Path::new("gitmodules")));
232 assert!(!GitmodulesParser::is_match(Path::new(".gitmodules.bak")));
233 }
234
235 #[test]
236 fn test_parse_single_submodule() {
237 let content = r#"
238[submodule "dep-lib"]
239 path = lib/dep
240 url = https://github.com/user/dep-lib.git
241"#;
242 let file = create_gitmodules_file(content);
243 let pkgs = GitmodulesParser::extract_packages(file.path());
244 assert_eq!(pkgs.len(), 1);
245 assert_eq!(pkgs[0].dependencies.len(), 1);
246 let dep = &pkgs[0].dependencies[0];
247 assert_eq!(dep.purl, Some("pkg:github/user/dep-lib".to_string()));
248 }
249
250 #[test]
251 fn test_parse_multiple_submodules() {
252 let content = r#"
253[submodule "lib1"]
254 path = libs/lib1
255 url = https://github.com/org/lib1.git
256
257[submodule "lib2"]
258 path = libs/lib2
259 url = git@github.com:org/lib2.git
260"#;
261 let file = create_gitmodules_file(content);
262 let pkgs = GitmodulesParser::extract_packages(file.path());
263 assert_eq!(pkgs.len(), 1);
264 assert_eq!(pkgs[0].dependencies.len(), 2);
265 }
266
267 #[test]
268 fn test_parse_git_ssh_url() {
269 let content = r#"
270[submodule "private-repo"]
271 path = private
272 url = git@github.com:company/private-repo.git
273"#;
274 let file = create_gitmodules_file(content);
275 let pkgs = GitmodulesParser::extract_packages(file.path());
276 let dep = &pkgs[0].dependencies[0];
277 assert_eq!(
278 dep.purl,
279 Some("pkg:github/company/private-repo".to_string())
280 );
281 }
282
283 #[test]
284 fn test_parse_gitlab_url() {
285 let content = r#"
286[submodule "gitlab-dep"]
287 path = gitlab-lib
288 url = https://gitlab.com/group/project.git
289"#;
290 let file = create_gitmodules_file(content);
291 let pkgs = GitmodulesParser::extract_packages(file.path());
292 let dep = &pkgs[0].dependencies[0];
293 assert_eq!(dep.purl, Some("pkg:gitlab/group/project".to_string()));
294 }
295
296 #[test]
297 fn test_parse_unknown_url() {
298 let content = r#"
299[submodule "custom"]
300 path = custom
301 url = https://example.com/repo.git
302"#;
303 let file = create_gitmodules_file(content);
304 let pkgs = GitmodulesParser::extract_packages(file.path());
305 let dep = &pkgs[0].dependencies[0];
306 assert!(dep.purl.is_none());
307 assert!(
308 dep.extracted_requirement
309 .as_ref()
310 .unwrap()
311 .contains("https://example.com/repo.git")
312 );
313 }
314
315 #[test]
316 fn test_parse_empty_file() {
317 let content = "";
318 let file = create_gitmodules_file(content);
319 let pkgs = GitmodulesParser::extract_packages(file.path());
320 assert_eq!(pkgs.len(), 1);
321 assert!(pkgs[0].dependencies.is_empty());
322 }
323
324 #[test]
325 fn test_parse_with_comments() {
326 let content = r#"
327# This is a comment
328[submodule "lib"]
329 ; another comment
330 path = lib
331 url = https://github.com/user/lib.git
332"#;
333 let file = create_gitmodules_file(content);
334 let pkgs = GitmodulesParser::extract_packages(file.path());
335 assert_eq!(pkgs[0].dependencies.len(), 1);
336 }
337}
338
339crate::register_parser!(
340 "Git submodules manifest",
341 &["**/.gitmodules"],
342 "gitmodules",
343 "",
344 Some("https://git-scm.com/docs/gitmodules"),
345);