Skip to main content

provenant/parsers/
gitmodules.rs

1//! Parser for Git submodule manifest files (`.gitmodules`).
2//!
3//! Extracts submodule dependencies from `.gitmodules` files, treating
4//! git submodules as package dependencies.
5//!
6//! # Supported Formats
7//! - `.gitmodules` (Git submodule configuration)
8//!
9//! # Key Features
10//! - Parses INI-style `.gitmodules` format
11//! - Extracts submodule name, path, and URL
12//! - Generates purl for GitHub/GitLab URLs when possible
13//! - Reports submodules as dependencies
14//!
15//! # Implementation Notes
16//! - Git submodules are treated as dependencies of the containing repository
17//! - URLs are parsed to extract package name when possible
18//! - Supports both https and git@ URL formats
19
20use std::collections::HashMap;
21use std::path::Path;
22
23use crate::parser_warn as warn;
24
25use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
26use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
27
28use super::PackageParser;
29
30const PACKAGE_TYPE: PackageType = PackageType::Github;
31
32fn default_package_data() -> PackageData {
33    PackageData {
34        package_type: Some(PACKAGE_TYPE),
35        datasource_id: Some(DatasourceId::Gitmodules),
36        ..Default::default()
37    }
38}
39
40pub struct GitmodulesParser;
41
42impl PackageParser for GitmodulesParser {
43    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
44
45    fn is_match(path: &Path) -> bool {
46        path.file_name().is_some_and(|name| name == ".gitmodules")
47    }
48
49    fn extract_packages(path: &Path) -> Vec<PackageData> {
50        let content = match read_file_to_string(path, None) {
51            Ok(c) => c,
52            Err(e) => {
53                warn!("Failed to read .gitmodules {:?}: {}", path, e);
54                return vec![default_package_data()];
55            }
56        };
57
58        let submodules = parse_gitmodules(&content);
59        if submodules.is_empty() {
60            return vec![default_package_data()];
61        }
62
63        let dependencies: Vec<Dependency> = submodules
64            .into_iter()
65            .take(MAX_ITERATION_COUNT)
66            .map(|sub| Dependency {
67                purl: sub.purl.map(truncate_field),
68                extracted_requirement: Some(truncate_field(format!("{} at {}", sub.path, sub.url))),
69                scope: Some(truncate_field("runtime".to_string())),
70                is_runtime: Some(true),
71                is_optional: Some(false),
72                is_direct: Some(true),
73                resolved_package: None,
74                extra_data: None,
75                is_pinned: Some(false),
76            })
77            .collect();
78
79        vec![PackageData {
80            package_type: Some(PACKAGE_TYPE),
81            datasource_id: Some(DatasourceId::Gitmodules),
82            dependencies,
83            ..Default::default()
84        }]
85    }
86}
87
88struct Submodule {
89    path: String,
90    url: String,
91    purl: Option<String>,
92}
93
94fn parse_gitmodules(content: &str) -> Vec<Submodule> {
95    let mut submodules = Vec::new();
96    let mut current_section: Option<HashMap<String, String>> = None;
97    let mut current_name: Option<String> = None;
98
99    for line in content.lines().take(MAX_ITERATION_COUNT) {
100        let line = line.trim();
101
102        if line.is_empty() || line.starts_with('#') || line.starts_with(';') {
103            continue;
104        }
105
106        if line.starts_with('[') && line.ends_with(']') {
107            if let Some(section) = current_section.take()
108                && let Some(name) = current_name.take()
109                && let Some(submodule) = build_submodule(name, section)
110            {
111                submodules.push(submodule);
112            }
113
114            let section_name = &line[1..line.len() - 1];
115            if let Some(stripped) = section_name.strip_prefix("submodule ") {
116                current_name = Some(truncate_field(stripped.trim_matches('"').to_string()));
117                current_section = Some(HashMap::new());
118            }
119        } else if let Some(ref mut section) = current_section
120            && let Some((key, value)) = line.split_once('=')
121        {
122            let key = truncate_field(key.trim().to_string());
123            let value = truncate_field(value.trim().to_string());
124            section.insert(key, value);
125        }
126    }
127
128    if let Some(section) = current_section
129        && let Some(name) = current_name
130        && let Some(submodule) = build_submodule(name, section)
131    {
132        submodules.push(submodule);
133    }
134
135    submodules
136}
137
138fn build_submodule(_name: String, section: HashMap<String, String>) -> Option<Submodule> {
139    let path = truncate_field(section.get("path").cloned().unwrap_or_default());
140    let url = truncate_field(section.get("url").cloned().unwrap_or_default());
141
142    if path.is_empty() && url.is_empty() {
143        return None;
144    }
145
146    let purl = build_purl_from_url(&url);
147
148    Some(Submodule { path, url, purl })
149}
150
151fn build_purl_from_url(url: &str) -> Option<String> {
152    if url.is_empty() {
153        return None;
154    }
155
156    if let Some(purl) = parse_github_url(url) {
157        return Some(purl);
158    }
159
160    if let Some(purl) = parse_gitlab_url(url) {
161        return Some(purl);
162    }
163
164    None
165}
166
167fn parse_github_url(url: &str) -> Option<String> {
168    let (namespace, name) = if url.starts_with("https://github.com/") {
169        let path = url.strip_prefix("https://github.com/")?;
170        parse_repo_path(path)?
171    } else if url.starts_with("git@github.com:") {
172        let path = url.strip_prefix("git@github.com:")?;
173        parse_repo_path(path)?
174    } else {
175        return None;
176    };
177
178    Some(truncate_field(format!("pkg:github/{}/{}", namespace, name)))
179}
180
181fn parse_gitlab_url(url: &str) -> Option<String> {
182    let (namespace, name) = if url.starts_with("https://gitlab.com/") {
183        let path = url.strip_prefix("https://gitlab.com/")?;
184        parse_repo_path(path)?
185    } else if url.starts_with("git@gitlab.com:") {
186        let path = url.strip_prefix("git@gitlab.com:")?;
187        parse_repo_path(path)?
188    } else {
189        return None;
190    };
191
192    Some(truncate_field(format!("pkg:gitlab/{}/{}", namespace, name)))
193}
194
195fn parse_repo_path(path: &str) -> Option<(String, String)> {
196    let path = path.strip_suffix(".git").unwrap_or(path);
197    let parts: Vec<&str> = path.split('/').collect();
198
199    if parts.len() < 2 {
200        return None;
201    }
202
203    let name = truncate_field(parts.last()?.to_string());
204    let namespace = truncate_field(parts[..parts.len() - 1].join("/"));
205
206    if namespace.is_empty() || name.is_empty() {
207        return None;
208    }
209
210    Some((namespace, name))
211}
212
213#[cfg(test)]
214mod tests {
215    use super::*;
216    use std::io::Write;
217    use tempfile::NamedTempFile;
218
219    fn create_gitmodules_file(content: &str) -> NamedTempFile {
220        let mut file = NamedTempFile::new().unwrap();
221        file.write_all(content.as_bytes()).unwrap();
222        file
223    }
224
225    #[test]
226    fn test_is_match() {
227        assert!(GitmodulesParser::is_match(Path::new(".gitmodules")));
228        assert!(GitmodulesParser::is_match(Path::new(
229            "/path/to/.gitmodules"
230        )));
231        assert!(!GitmodulesParser::is_match(Path::new("gitmodules")));
232        assert!(!GitmodulesParser::is_match(Path::new(".gitmodules.bak")));
233    }
234
235    #[test]
236    fn test_parse_single_submodule() {
237        let content = r#"
238[submodule "dep-lib"]
239    path = lib/dep
240    url = https://github.com/user/dep-lib.git
241"#;
242        let file = create_gitmodules_file(content);
243        let pkgs = GitmodulesParser::extract_packages(file.path());
244        assert_eq!(pkgs.len(), 1);
245        assert_eq!(pkgs[0].dependencies.len(), 1);
246        let dep = &pkgs[0].dependencies[0];
247        assert_eq!(dep.purl, Some("pkg:github/user/dep-lib".to_string()));
248    }
249
250    #[test]
251    fn test_parse_multiple_submodules() {
252        let content = r#"
253[submodule "lib1"]
254    path = libs/lib1
255    url = https://github.com/org/lib1.git
256
257[submodule "lib2"]
258    path = libs/lib2
259    url = git@github.com:org/lib2.git
260"#;
261        let file = create_gitmodules_file(content);
262        let pkgs = GitmodulesParser::extract_packages(file.path());
263        assert_eq!(pkgs.len(), 1);
264        assert_eq!(pkgs[0].dependencies.len(), 2);
265    }
266
267    #[test]
268    fn test_parse_git_ssh_url() {
269        let content = r#"
270[submodule "private-repo"]
271    path = private
272    url = git@github.com:company/private-repo.git
273"#;
274        let file = create_gitmodules_file(content);
275        let pkgs = GitmodulesParser::extract_packages(file.path());
276        let dep = &pkgs[0].dependencies[0];
277        assert_eq!(
278            dep.purl,
279            Some("pkg:github/company/private-repo".to_string())
280        );
281    }
282
283    #[test]
284    fn test_parse_gitlab_url() {
285        let content = r#"
286[submodule "gitlab-dep"]
287    path = gitlab-lib
288    url = https://gitlab.com/group/project.git
289"#;
290        let file = create_gitmodules_file(content);
291        let pkgs = GitmodulesParser::extract_packages(file.path());
292        let dep = &pkgs[0].dependencies[0];
293        assert_eq!(dep.purl, Some("pkg:gitlab/group/project".to_string()));
294    }
295
296    #[test]
297    fn test_parse_unknown_url() {
298        let content = r#"
299[submodule "custom"]
300    path = custom
301    url = https://example.com/repo.git
302"#;
303        let file = create_gitmodules_file(content);
304        let pkgs = GitmodulesParser::extract_packages(file.path());
305        let dep = &pkgs[0].dependencies[0];
306        assert!(dep.purl.is_none());
307        assert!(
308            dep.extracted_requirement
309                .as_ref()
310                .unwrap()
311                .contains("https://example.com/repo.git")
312        );
313    }
314
315    #[test]
316    fn test_parse_empty_file() {
317        let content = "";
318        let file = create_gitmodules_file(content);
319        let pkgs = GitmodulesParser::extract_packages(file.path());
320        assert_eq!(pkgs.len(), 1);
321        assert!(pkgs[0].dependencies.is_empty());
322    }
323
324    #[test]
325    fn test_parse_with_comments() {
326        let content = r#"
327# This is a comment
328[submodule "lib"]
329    ; another comment
330    path = lib
331    url = https://github.com/user/lib.git
332"#;
333        let file = create_gitmodules_file(content);
334        let pkgs = GitmodulesParser::extract_packages(file.path());
335        assert_eq!(pkgs[0].dependencies.len(), 1);
336    }
337}
338
339crate::register_parser!(
340    "Git submodules manifest",
341    &["**/.gitmodules"],
342    "gitmodules",
343    "",
344    Some("https://git-scm.com/docs/gitmodules"),
345);