provenant/parsers/
gitmodules.rs1use std::collections::HashMap;
21use std::path::Path;
22
23use log::warn;
24
25use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
26use crate::parsers::utils::read_file_to_string;
27
28use super::PackageParser;
29
30const PACKAGE_TYPE: PackageType = PackageType::Github;
31
32fn default_package_data() -> PackageData {
33 PackageData {
34 package_type: Some(PACKAGE_TYPE),
35 datasource_id: Some(DatasourceId::Gitmodules),
36 ..Default::default()
37 }
38}
39
40pub struct GitmodulesParser;
41
42impl PackageParser for GitmodulesParser {
43 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
44
45 fn is_match(path: &Path) -> bool {
46 path.file_name().is_some_and(|name| name == ".gitmodules")
47 }
48
49 fn extract_packages(path: &Path) -> Vec<PackageData> {
50 let content = match read_file_to_string(path) {
51 Ok(c) => c,
52 Err(e) => {
53 warn!("Failed to read .gitmodules {:?}: {}", path, e);
54 return vec![default_package_data()];
55 }
56 };
57
58 let submodules = parse_gitmodules(&content);
59 if submodules.is_empty() {
60 return vec![default_package_data()];
61 }
62
63 let dependencies: Vec<Dependency> = submodules
64 .into_iter()
65 .map(|sub| Dependency {
66 purl: sub.purl,
67 extracted_requirement: Some(format!("{} at {}", sub.path, sub.url)),
68 scope: Some("runtime".to_string()),
69 is_runtime: Some(true),
70 is_optional: Some(false),
71 is_direct: Some(true),
72 resolved_package: None,
73 extra_data: None,
74 is_pinned: Some(false),
75 })
76 .collect();
77
78 vec![PackageData {
79 package_type: Some(PACKAGE_TYPE),
80 datasource_id: Some(DatasourceId::Gitmodules),
81 dependencies,
82 ..Default::default()
83 }]
84 }
85}
86
87struct Submodule {
88 #[allow(dead_code)]
89 name: String,
90 path: String,
91 url: String,
92 purl: Option<String>,
93}
94
95fn parse_gitmodules(content: &str) -> Vec<Submodule> {
96 let mut submodules = Vec::new();
97 let mut current_section: Option<HashMap<String, String>> = None;
98 let mut current_name: Option<String> = None;
99
100 for line in content.lines() {
101 let line = line.trim();
102
103 if line.is_empty() || line.starts_with('#') || line.starts_with(';') {
104 continue;
105 }
106
107 if line.starts_with('[') && line.ends_with(']') {
108 if let Some(section) = current_section.take()
109 && let Some(name) = current_name.take()
110 && let Some(submodule) = build_submodule(name, section)
111 {
112 submodules.push(submodule);
113 }
114
115 let section_name = &line[1..line.len() - 1];
116 if let Some(stripped) = section_name.strip_prefix("submodule ") {
117 current_name = Some(stripped.trim_matches('"').to_string());
118 current_section = Some(HashMap::new());
119 }
120 } else if let Some(ref mut section) = current_section
121 && let Some((key, value)) = line.split_once('=')
122 {
123 let key = key.trim().to_string();
124 let value = value.trim().to_string();
125 section.insert(key, value);
126 }
127 }
128
129 if let Some(section) = current_section
130 && let Some(name) = current_name
131 && let Some(submodule) = build_submodule(name, section)
132 {
133 submodules.push(submodule);
134 }
135
136 submodules
137}
138
139fn build_submodule(name: String, section: HashMap<String, String>) -> Option<Submodule> {
140 let path = section.get("path").cloned().unwrap_or_default();
141 let url = section.get("url").cloned().unwrap_or_default();
142
143 if path.is_empty() && url.is_empty() {
144 return None;
145 }
146
147 let purl = build_purl_from_url(&url);
148
149 Some(Submodule {
150 name,
151 path,
152 url,
153 purl,
154 })
155}
156
157fn build_purl_from_url(url: &str) -> Option<String> {
158 if url.is_empty() {
159 return None;
160 }
161
162 if let Some(purl) = parse_github_url(url) {
163 return Some(purl);
164 }
165
166 if let Some(purl) = parse_gitlab_url(url) {
167 return Some(purl);
168 }
169
170 None
171}
172
173fn parse_github_url(url: &str) -> Option<String> {
174 let (namespace, name) = if url.starts_with("https://github.com/") {
175 let path = url.strip_prefix("https://github.com/")?;
176 parse_repo_path(path)?
177 } else if url.starts_with("git@github.com:") {
178 let path = url.strip_prefix("git@github.com:")?;
179 parse_repo_path(path)?
180 } else {
181 return None;
182 };
183
184 Some(format!("pkg:github/{}/{}", namespace, name))
185}
186
187fn parse_gitlab_url(url: &str) -> Option<String> {
188 let (namespace, name) = if url.starts_with("https://gitlab.com/") {
189 let path = url.strip_prefix("https://gitlab.com/")?;
190 parse_repo_path(path)?
191 } else if url.starts_with("git@gitlab.com:") {
192 let path = url.strip_prefix("git@gitlab.com:")?;
193 parse_repo_path(path)?
194 } else {
195 return None;
196 };
197
198 Some(format!("pkg:gitlab/{}/{}", namespace, name))
199}
200
201fn parse_repo_path(path: &str) -> Option<(String, String)> {
202 let path = path.strip_suffix(".git").unwrap_or(path);
203 let parts: Vec<&str> = path.split('/').collect();
204
205 if parts.len() < 2 {
206 return None;
207 }
208
209 let name = parts.last()?.to_string();
210 let namespace = parts[..parts.len() - 1].join("/");
211
212 if namespace.is_empty() || name.is_empty() {
213 return None;
214 }
215
216 Some((namespace, name))
217}
218
219#[cfg(test)]
220mod tests {
221 use super::*;
222 use std::io::Write;
223 use tempfile::NamedTempFile;
224
225 fn create_gitmodules_file(content: &str) -> NamedTempFile {
226 let mut file = NamedTempFile::new().unwrap();
227 file.write_all(content.as_bytes()).unwrap();
228 file
229 }
230
231 #[test]
232 fn test_is_match() {
233 assert!(GitmodulesParser::is_match(Path::new(".gitmodules")));
234 assert!(GitmodulesParser::is_match(Path::new(
235 "/path/to/.gitmodules"
236 )));
237 assert!(!GitmodulesParser::is_match(Path::new("gitmodules")));
238 assert!(!GitmodulesParser::is_match(Path::new(".gitmodules.bak")));
239 }
240
241 #[test]
242 fn test_parse_single_submodule() {
243 let content = r#"
244[submodule "dep-lib"]
245 path = lib/dep
246 url = https://github.com/user/dep-lib.git
247"#;
248 let file = create_gitmodules_file(content);
249 let pkgs = GitmodulesParser::extract_packages(file.path());
250 assert_eq!(pkgs.len(), 1);
251 assert_eq!(pkgs[0].dependencies.len(), 1);
252 let dep = &pkgs[0].dependencies[0];
253 assert_eq!(dep.purl, Some("pkg:github/user/dep-lib".to_string()));
254 }
255
256 #[test]
257 fn test_parse_multiple_submodules() {
258 let content = r#"
259[submodule "lib1"]
260 path = libs/lib1
261 url = https://github.com/org/lib1.git
262
263[submodule "lib2"]
264 path = libs/lib2
265 url = git@github.com:org/lib2.git
266"#;
267 let file = create_gitmodules_file(content);
268 let pkgs = GitmodulesParser::extract_packages(file.path());
269 assert_eq!(pkgs.len(), 1);
270 assert_eq!(pkgs[0].dependencies.len(), 2);
271 }
272
273 #[test]
274 fn test_parse_git_ssh_url() {
275 let content = r#"
276[submodule "private-repo"]
277 path = private
278 url = git@github.com:company/private-repo.git
279"#;
280 let file = create_gitmodules_file(content);
281 let pkgs = GitmodulesParser::extract_packages(file.path());
282 let dep = &pkgs[0].dependencies[0];
283 assert_eq!(
284 dep.purl,
285 Some("pkg:github/company/private-repo".to_string())
286 );
287 }
288
289 #[test]
290 fn test_parse_gitlab_url() {
291 let content = r#"
292[submodule "gitlab-dep"]
293 path = gitlab-lib
294 url = https://gitlab.com/group/project.git
295"#;
296 let file = create_gitmodules_file(content);
297 let pkgs = GitmodulesParser::extract_packages(file.path());
298 let dep = &pkgs[0].dependencies[0];
299 assert_eq!(dep.purl, Some("pkg:gitlab/group/project".to_string()));
300 }
301
302 #[test]
303 fn test_parse_unknown_url() {
304 let content = r#"
305[submodule "custom"]
306 path = custom
307 url = https://example.com/repo.git
308"#;
309 let file = create_gitmodules_file(content);
310 let pkgs = GitmodulesParser::extract_packages(file.path());
311 let dep = &pkgs[0].dependencies[0];
312 assert!(dep.purl.is_none());
313 assert!(
314 dep.extracted_requirement
315 .as_ref()
316 .unwrap()
317 .contains("https://example.com/repo.git")
318 );
319 }
320
321 #[test]
322 fn test_parse_empty_file() {
323 let content = "";
324 let file = create_gitmodules_file(content);
325 let pkgs = GitmodulesParser::extract_packages(file.path());
326 assert_eq!(pkgs.len(), 1);
327 assert!(pkgs[0].dependencies.is_empty());
328 }
329
330 #[test]
331 fn test_parse_with_comments() {
332 let content = r#"
333# This is a comment
334[submodule "lib"]
335 ; another comment
336 path = lib
337 url = https://github.com/user/lib.git
338"#;
339 let file = create_gitmodules_file(content);
340 let pkgs = GitmodulesParser::extract_packages(file.path());
341 assert_eq!(pkgs[0].dependencies.len(), 1);
342 }
343}
344
345crate::register_parser!(
346 "Git submodules manifest",
347 &["**/.gitmodules"],
348 "gitmodules",
349 "",
350 Some("https://git-scm.com/docs/gitmodules"),
351);