Skip to main content

provenant/parsers/
gitmodules.rs

1//! Parser for Git submodule manifest files (`.gitmodules`).
2//!
3//! Extracts submodule dependencies from `.gitmodules` files, treating
4//! git submodules as package dependencies.
5//!
6//! # Supported Formats
7//! - `.gitmodules` (Git submodule configuration)
8//!
9//! # Key Features
10//! - Parses INI-style `.gitmodules` format
11//! - Extracts submodule name, path, and URL
12//! - Generates purl for GitHub/GitLab URLs when possible
13//! - Reports submodules as dependencies
14//!
15//! # Implementation Notes
16//! - Git submodules are treated as dependencies of the containing repository
17//! - URLs are parsed to extract package name when possible
18//! - Supports both https and git@ URL formats
19
20use std::collections::HashMap;
21use std::path::Path;
22
23use log::warn;
24
25use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
26use crate::parsers::utils::read_file_to_string;
27
28use super::PackageParser;
29
30const PACKAGE_TYPE: PackageType = PackageType::Github;
31
32fn default_package_data() -> PackageData {
33    PackageData {
34        package_type: Some(PACKAGE_TYPE),
35        datasource_id: Some(DatasourceId::Gitmodules),
36        ..Default::default()
37    }
38}
39
40pub struct GitmodulesParser;
41
42impl PackageParser for GitmodulesParser {
43    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
44
45    fn is_match(path: &Path) -> bool {
46        path.file_name().is_some_and(|name| name == ".gitmodules")
47    }
48
49    fn extract_packages(path: &Path) -> Vec<PackageData> {
50        let content = match read_file_to_string(path) {
51            Ok(c) => c,
52            Err(e) => {
53                warn!("Failed to read .gitmodules {:?}: {}", path, e);
54                return vec![default_package_data()];
55            }
56        };
57
58        let submodules = parse_gitmodules(&content);
59        if submodules.is_empty() {
60            return vec![default_package_data()];
61        }
62
63        let dependencies: Vec<Dependency> = submodules
64            .into_iter()
65            .map(|sub| Dependency {
66                purl: sub.purl,
67                extracted_requirement: Some(format!("{} at {}", sub.path, sub.url)),
68                scope: Some("runtime".to_string()),
69                is_runtime: Some(true),
70                is_optional: Some(false),
71                is_direct: Some(true),
72                resolved_package: None,
73                extra_data: None,
74                is_pinned: Some(false),
75            })
76            .collect();
77
78        vec![PackageData {
79            package_type: Some(PACKAGE_TYPE),
80            datasource_id: Some(DatasourceId::Gitmodules),
81            dependencies,
82            ..Default::default()
83        }]
84    }
85}
86
87struct Submodule {
88    path: String,
89    url: String,
90    purl: Option<String>,
91}
92
93fn parse_gitmodules(content: &str) -> Vec<Submodule> {
94    let mut submodules = Vec::new();
95    let mut current_section: Option<HashMap<String, String>> = None;
96    let mut current_name: Option<String> = None;
97
98    for line in content.lines() {
99        let line = line.trim();
100
101        if line.is_empty() || line.starts_with('#') || line.starts_with(';') {
102            continue;
103        }
104
105        if line.starts_with('[') && line.ends_with(']') {
106            if let Some(section) = current_section.take()
107                && let Some(name) = current_name.take()
108                && let Some(submodule) = build_submodule(name, section)
109            {
110                submodules.push(submodule);
111            }
112
113            let section_name = &line[1..line.len() - 1];
114            if let Some(stripped) = section_name.strip_prefix("submodule ") {
115                current_name = Some(stripped.trim_matches('"').to_string());
116                current_section = Some(HashMap::new());
117            }
118        } else if let Some(ref mut section) = current_section
119            && let Some((key, value)) = line.split_once('=')
120        {
121            let key = key.trim().to_string();
122            let value = value.trim().to_string();
123            section.insert(key, value);
124        }
125    }
126
127    if let Some(section) = current_section
128        && let Some(name) = current_name
129        && let Some(submodule) = build_submodule(name, section)
130    {
131        submodules.push(submodule);
132    }
133
134    submodules
135}
136
137fn build_submodule(_name: String, section: HashMap<String, String>) -> Option<Submodule> {
138    let path = section.get("path").cloned().unwrap_or_default();
139    let url = section.get("url").cloned().unwrap_or_default();
140
141    if path.is_empty() && url.is_empty() {
142        return None;
143    }
144
145    let purl = build_purl_from_url(&url);
146
147    Some(Submodule { path, url, purl })
148}
149
150fn build_purl_from_url(url: &str) -> Option<String> {
151    if url.is_empty() {
152        return None;
153    }
154
155    if let Some(purl) = parse_github_url(url) {
156        return Some(purl);
157    }
158
159    if let Some(purl) = parse_gitlab_url(url) {
160        return Some(purl);
161    }
162
163    None
164}
165
166fn parse_github_url(url: &str) -> Option<String> {
167    let (namespace, name) = if url.starts_with("https://github.com/") {
168        let path = url.strip_prefix("https://github.com/")?;
169        parse_repo_path(path)?
170    } else if url.starts_with("git@github.com:") {
171        let path = url.strip_prefix("git@github.com:")?;
172        parse_repo_path(path)?
173    } else {
174        return None;
175    };
176
177    Some(format!("pkg:github/{}/{}", namespace, name))
178}
179
180fn parse_gitlab_url(url: &str) -> Option<String> {
181    let (namespace, name) = if url.starts_with("https://gitlab.com/") {
182        let path = url.strip_prefix("https://gitlab.com/")?;
183        parse_repo_path(path)?
184    } else if url.starts_with("git@gitlab.com:") {
185        let path = url.strip_prefix("git@gitlab.com:")?;
186        parse_repo_path(path)?
187    } else {
188        return None;
189    };
190
191    Some(format!("pkg:gitlab/{}/{}", namespace, name))
192}
193
194fn parse_repo_path(path: &str) -> Option<(String, String)> {
195    let path = path.strip_suffix(".git").unwrap_or(path);
196    let parts: Vec<&str> = path.split('/').collect();
197
198    if parts.len() < 2 {
199        return None;
200    }
201
202    let name = parts.last()?.to_string();
203    let namespace = parts[..parts.len() - 1].join("/");
204
205    if namespace.is_empty() || name.is_empty() {
206        return None;
207    }
208
209    Some((namespace, name))
210}
211
212#[cfg(test)]
213mod tests {
214    use super::*;
215    use std::io::Write;
216    use tempfile::NamedTempFile;
217
218    fn create_gitmodules_file(content: &str) -> NamedTempFile {
219        let mut file = NamedTempFile::new().unwrap();
220        file.write_all(content.as_bytes()).unwrap();
221        file
222    }
223
224    #[test]
225    fn test_is_match() {
226        assert!(GitmodulesParser::is_match(Path::new(".gitmodules")));
227        assert!(GitmodulesParser::is_match(Path::new(
228            "/path/to/.gitmodules"
229        )));
230        assert!(!GitmodulesParser::is_match(Path::new("gitmodules")));
231        assert!(!GitmodulesParser::is_match(Path::new(".gitmodules.bak")));
232    }
233
234    #[test]
235    fn test_parse_single_submodule() {
236        let content = r#"
237[submodule "dep-lib"]
238    path = lib/dep
239    url = https://github.com/user/dep-lib.git
240"#;
241        let file = create_gitmodules_file(content);
242        let pkgs = GitmodulesParser::extract_packages(file.path());
243        assert_eq!(pkgs.len(), 1);
244        assert_eq!(pkgs[0].dependencies.len(), 1);
245        let dep = &pkgs[0].dependencies[0];
246        assert_eq!(dep.purl, Some("pkg:github/user/dep-lib".to_string()));
247    }
248
249    #[test]
250    fn test_parse_multiple_submodules() {
251        let content = r#"
252[submodule "lib1"]
253    path = libs/lib1
254    url = https://github.com/org/lib1.git
255
256[submodule "lib2"]
257    path = libs/lib2
258    url = git@github.com:org/lib2.git
259"#;
260        let file = create_gitmodules_file(content);
261        let pkgs = GitmodulesParser::extract_packages(file.path());
262        assert_eq!(pkgs.len(), 1);
263        assert_eq!(pkgs[0].dependencies.len(), 2);
264    }
265
266    #[test]
267    fn test_parse_git_ssh_url() {
268        let content = r#"
269[submodule "private-repo"]
270    path = private
271    url = git@github.com:company/private-repo.git
272"#;
273        let file = create_gitmodules_file(content);
274        let pkgs = GitmodulesParser::extract_packages(file.path());
275        let dep = &pkgs[0].dependencies[0];
276        assert_eq!(
277            dep.purl,
278            Some("pkg:github/company/private-repo".to_string())
279        );
280    }
281
282    #[test]
283    fn test_parse_gitlab_url() {
284        let content = r#"
285[submodule "gitlab-dep"]
286    path = gitlab-lib
287    url = https://gitlab.com/group/project.git
288"#;
289        let file = create_gitmodules_file(content);
290        let pkgs = GitmodulesParser::extract_packages(file.path());
291        let dep = &pkgs[0].dependencies[0];
292        assert_eq!(dep.purl, Some("pkg:gitlab/group/project".to_string()));
293    }
294
295    #[test]
296    fn test_parse_unknown_url() {
297        let content = r#"
298[submodule "custom"]
299    path = custom
300    url = https://example.com/repo.git
301"#;
302        let file = create_gitmodules_file(content);
303        let pkgs = GitmodulesParser::extract_packages(file.path());
304        let dep = &pkgs[0].dependencies[0];
305        assert!(dep.purl.is_none());
306        assert!(
307            dep.extracted_requirement
308                .as_ref()
309                .unwrap()
310                .contains("https://example.com/repo.git")
311        );
312    }
313
314    #[test]
315    fn test_parse_empty_file() {
316        let content = "";
317        let file = create_gitmodules_file(content);
318        let pkgs = GitmodulesParser::extract_packages(file.path());
319        assert_eq!(pkgs.len(), 1);
320        assert!(pkgs[0].dependencies.is_empty());
321    }
322
323    #[test]
324    fn test_parse_with_comments() {
325        let content = r#"
326# This is a comment
327[submodule "lib"]
328    ; another comment
329    path = lib
330    url = https://github.com/user/lib.git
331"#;
332        let file = create_gitmodules_file(content);
333        let pkgs = GitmodulesParser::extract_packages(file.path());
334        assert_eq!(pkgs[0].dependencies.len(), 1);
335    }
336}
337
338crate::register_parser!(
339    "Git submodules manifest",
340    &["**/.gitmodules"],
341    "gitmodules",
342    "",
343    Some("https://git-scm.com/docs/gitmodules"),
344);