provenant/parsers/
gitmodules.rs1use std::collections::HashMap;
24use std::path::Path;
25
26use crate::parser_warn as warn;
27
28use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
29use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
30
31use super::PackageParser;
32use super::metadata::ParserMetadata;
33
34const PACKAGE_TYPE: PackageType = PackageType::Github;
35
36fn default_package_data() -> PackageData {
37 PackageData {
38 package_type: Some(PACKAGE_TYPE),
39 datasource_id: Some(DatasourceId::Gitmodules),
40 ..Default::default()
41 }
42}
43
44pub struct GitmodulesParser;
45
46impl PackageParser for GitmodulesParser {
47 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
48
49 fn metadata() -> Vec<ParserMetadata> {
50 vec![ParserMetadata {
51 description: "Git submodules manifest",
52 file_patterns: &["**/.gitmodules"],
53 package_type: "gitmodules",
54 primary_language: "",
55 documentation_url: Some("https://git-scm.com/docs/gitmodules"),
56 }]
57 }
58
59 fn is_match(path: &Path) -> bool {
60 path.file_name().is_some_and(|name| name == ".gitmodules")
61 }
62
63 fn extract_packages(path: &Path) -> Vec<PackageData> {
64 let content = match read_file_to_string(path, None) {
65 Ok(c) => c,
66 Err(e) => {
67 warn!("Failed to read .gitmodules {:?}: {}", path, e);
68 return vec![default_package_data()];
69 }
70 };
71
72 let submodules = parse_gitmodules(&content);
73 if submodules.is_empty() {
74 return vec![default_package_data()];
75 }
76
77 let dependencies: Vec<Dependency> = submodules
78 .into_iter()
79 .take(MAX_ITERATION_COUNT)
80 .map(|sub| Dependency {
81 purl: sub.purl.map(truncate_field),
82 extracted_requirement: Some(truncate_field(format!("{} at {}", sub.path, sub.url))),
83 scope: Some(truncate_field("runtime".to_string())),
84 is_runtime: Some(true),
85 is_optional: Some(false),
86 is_direct: Some(true),
87 resolved_package: None,
88 extra_data: None,
89 is_pinned: Some(false),
90 })
91 .collect();
92
93 vec![PackageData {
94 package_type: Some(PACKAGE_TYPE),
95 datasource_id: Some(DatasourceId::Gitmodules),
96 dependencies,
97 ..Default::default()
98 }]
99 }
100}
101
102struct Submodule {
103 path: String,
104 url: String,
105 purl: Option<String>,
106}
107
108fn parse_gitmodules(content: &str) -> Vec<Submodule> {
109 let mut submodules = Vec::new();
110 let mut current_section: Option<HashMap<String, String>> = None;
111 let mut current_name: Option<String> = None;
112
113 for line in content.lines().take(MAX_ITERATION_COUNT) {
114 let line = line.trim();
115
116 if line.is_empty() || line.starts_with('#') || line.starts_with(';') {
117 continue;
118 }
119
120 if line.starts_with('[') && line.ends_with(']') {
121 if let Some(section) = current_section.take()
122 && let Some(name) = current_name.take()
123 && let Some(submodule) = build_submodule(name, section)
124 {
125 submodules.push(submodule);
126 }
127
128 let section_name = &line[1..line.len() - 1];
129 if let Some(stripped) = section_name.strip_prefix("submodule ") {
130 current_name = Some(truncate_field(stripped.trim_matches('"').to_string()));
131 current_section = Some(HashMap::new());
132 }
133 } else if let Some(ref mut section) = current_section
134 && let Some((key, value)) = line.split_once('=')
135 {
136 let key = truncate_field(key.trim().to_string());
137 let value = truncate_field(value.trim().to_string());
138 section.insert(key, value);
139 }
140 }
141
142 if let Some(section) = current_section
143 && let Some(name) = current_name
144 && let Some(submodule) = build_submodule(name, section)
145 {
146 submodules.push(submodule);
147 }
148
149 submodules
150}
151
152fn build_submodule(_name: String, section: HashMap<String, String>) -> Option<Submodule> {
153 let path = truncate_field(section.get("path").cloned().unwrap_or_default());
154 let url = truncate_field(section.get("url").cloned().unwrap_or_default());
155
156 if path.is_empty() && url.is_empty() {
157 return None;
158 }
159
160 let purl = build_purl_from_url(&url);
161
162 Some(Submodule { path, url, purl })
163}
164
165fn build_purl_from_url(url: &str) -> Option<String> {
166 if url.is_empty() {
167 return None;
168 }
169
170 if let Some(purl) = parse_github_url(url) {
171 return Some(purl);
172 }
173
174 if let Some(purl) = parse_gitlab_url(url) {
175 return Some(purl);
176 }
177
178 None
179}
180
181fn parse_github_url(url: &str) -> Option<String> {
182 let (namespace, name) = if url.starts_with("https://github.com/") {
183 let path = url.strip_prefix("https://github.com/")?;
184 parse_repo_path(path)?
185 } else if url.starts_with("git@github.com:") {
186 let path = url.strip_prefix("git@github.com:")?;
187 parse_repo_path(path)?
188 } else {
189 return None;
190 };
191
192 Some(truncate_field(format!("pkg:github/{}/{}", namespace, name)))
193}
194
195fn parse_gitlab_url(url: &str) -> Option<String> {
196 let (namespace, name) = if url.starts_with("https://gitlab.com/") {
197 let path = url.strip_prefix("https://gitlab.com/")?;
198 parse_repo_path(path)?
199 } else if url.starts_with("git@gitlab.com:") {
200 let path = url.strip_prefix("git@gitlab.com:")?;
201 parse_repo_path(path)?
202 } else {
203 return None;
204 };
205
206 Some(truncate_field(format!("pkg:gitlab/{}/{}", namespace, name)))
207}
208
209fn parse_repo_path(path: &str) -> Option<(String, String)> {
210 let path = path.strip_suffix(".git").unwrap_or(path);
211 let parts: Vec<&str> = path.split('/').collect();
212
213 if parts.len() < 2 {
214 return None;
215 }
216
217 let name = truncate_field(parts.last()?.to_string());
218 let namespace = truncate_field(parts[..parts.len() - 1].join("/"));
219
220 if namespace.is_empty() || name.is_empty() {
221 return None;
222 }
223
224 Some((namespace, name))
225}
226
227#[cfg(test)]
228mod tests {
229 use super::*;
230 use std::io::Write;
231 use tempfile::NamedTempFile;
232
233 fn create_gitmodules_file(content: &str) -> NamedTempFile {
234 let mut file = NamedTempFile::new().unwrap();
235 file.write_all(content.as_bytes()).unwrap();
236 file
237 }
238
239 #[test]
240 fn test_is_match() {
241 assert!(GitmodulesParser::is_match(Path::new(".gitmodules")));
242 assert!(GitmodulesParser::is_match(Path::new(
243 "/path/to/.gitmodules"
244 )));
245 assert!(!GitmodulesParser::is_match(Path::new("gitmodules")));
246 assert!(!GitmodulesParser::is_match(Path::new(".gitmodules.bak")));
247 }
248
249 #[test]
250 fn test_parse_single_submodule() {
251 let content = r#"
252[submodule "dep-lib"]
253 path = lib/dep
254 url = https://github.com/user/dep-lib.git
255"#;
256 let file = create_gitmodules_file(content);
257 let pkgs = GitmodulesParser::extract_packages(file.path());
258 assert_eq!(pkgs.len(), 1);
259 assert_eq!(pkgs[0].dependencies.len(), 1);
260 let dep = &pkgs[0].dependencies[0];
261 assert_eq!(dep.purl, Some("pkg:github/user/dep-lib".to_string()));
262 }
263
264 #[test]
265 fn test_parse_multiple_submodules() {
266 let content = r#"
267[submodule "lib1"]
268 path = libs/lib1
269 url = https://github.com/org/lib1.git
270
271[submodule "lib2"]
272 path = libs/lib2
273 url = git@github.com:org/lib2.git
274"#;
275 let file = create_gitmodules_file(content);
276 let pkgs = GitmodulesParser::extract_packages(file.path());
277 assert_eq!(pkgs.len(), 1);
278 assert_eq!(pkgs[0].dependencies.len(), 2);
279 }
280
281 #[test]
282 fn test_parse_git_ssh_url() {
283 let content = r#"
284[submodule "private-repo"]
285 path = private
286 url = git@github.com:company/private-repo.git
287"#;
288 let file = create_gitmodules_file(content);
289 let pkgs = GitmodulesParser::extract_packages(file.path());
290 let dep = &pkgs[0].dependencies[0];
291 assert_eq!(
292 dep.purl,
293 Some("pkg:github/company/private-repo".to_string())
294 );
295 }
296
297 #[test]
298 fn test_parse_gitlab_url() {
299 let content = r#"
300[submodule "gitlab-dep"]
301 path = gitlab-lib
302 url = https://gitlab.com/group/project.git
303"#;
304 let file = create_gitmodules_file(content);
305 let pkgs = GitmodulesParser::extract_packages(file.path());
306 let dep = &pkgs[0].dependencies[0];
307 assert_eq!(dep.purl, Some("pkg:gitlab/group/project".to_string()));
308 }
309
310 #[test]
311 fn test_parse_unknown_url() {
312 let content = r#"
313[submodule "custom"]
314 path = custom
315 url = https://example.com/repo.git
316"#;
317 let file = create_gitmodules_file(content);
318 let pkgs = GitmodulesParser::extract_packages(file.path());
319 let dep = &pkgs[0].dependencies[0];
320 assert!(dep.purl.is_none());
321 assert!(
322 dep.extracted_requirement
323 .as_ref()
324 .unwrap()
325 .contains("https://example.com/repo.git")
326 );
327 }
328
329 #[test]
330 fn test_parse_empty_file() {
331 let content = "";
332 let file = create_gitmodules_file(content);
333 let pkgs = GitmodulesParser::extract_packages(file.path());
334 assert_eq!(pkgs.len(), 1);
335 assert!(pkgs[0].dependencies.is_empty());
336 }
337
338 #[test]
339 fn test_parse_with_comments() {
340 let content = r#"
341# This is a comment
342[submodule "lib"]
343 ; another comment
344 path = lib
345 url = https://github.com/user/lib.git
346"#;
347 let file = create_gitmodules_file(content);
348 let pkgs = GitmodulesParser::extract_packages(file.path());
349 assert_eq!(pkgs[0].dependencies.len(), 1);
350 }
351}