Skip to main content

provenant/parsers/
gitmodules.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for Git submodule manifest files (`.gitmodules`).
5//!
6//! Extracts submodule dependencies from `.gitmodules` files, treating
7//! git submodules as package dependencies.
8//!
9//! # Supported Formats
10//! - `.gitmodules` (Git submodule configuration)
11//!
12//! # Key Features
13//! - Parses INI-style `.gitmodules` format
14//! - Extracts submodule name, path, and URL
15//! - Generates purl for GitHub/GitLab URLs when possible
16//! - Reports submodules as dependencies
17//!
18//! # Implementation Notes
19//! - Git submodules are treated as dependencies of the containing repository
20//! - URLs are parsed to extract package name when possible
21//! - Supports both https and git@ URL formats
22
23use std::collections::HashMap;
24use std::path::Path;
25
26use crate::parser_warn as warn;
27
28use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
29use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
30
31use super::PackageParser;
32
33const PACKAGE_TYPE: PackageType = PackageType::Github;
34
35fn default_package_data() -> PackageData {
36    PackageData {
37        package_type: Some(PACKAGE_TYPE),
38        datasource_id: Some(DatasourceId::Gitmodules),
39        ..Default::default()
40    }
41}
42
43pub struct GitmodulesParser;
44
45impl PackageParser for GitmodulesParser {
46    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
47
48    fn is_match(path: &Path) -> bool {
49        path.file_name().is_some_and(|name| name == ".gitmodules")
50    }
51
52    fn extract_packages(path: &Path) -> Vec<PackageData> {
53        let content = match read_file_to_string(path, None) {
54            Ok(c) => c,
55            Err(e) => {
56                warn!("Failed to read .gitmodules {:?}: {}", path, e);
57                return vec![default_package_data()];
58            }
59        };
60
61        let submodules = parse_gitmodules(&content);
62        if submodules.is_empty() {
63            return vec![default_package_data()];
64        }
65
66        let dependencies: Vec<Dependency> = submodules
67            .into_iter()
68            .take(MAX_ITERATION_COUNT)
69            .map(|sub| Dependency {
70                purl: sub.purl.map(truncate_field),
71                extracted_requirement: Some(truncate_field(format!("{} at {}", sub.path, sub.url))),
72                scope: Some(truncate_field("runtime".to_string())),
73                is_runtime: Some(true),
74                is_optional: Some(false),
75                is_direct: Some(true),
76                resolved_package: None,
77                extra_data: None,
78                is_pinned: Some(false),
79            })
80            .collect();
81
82        vec![PackageData {
83            package_type: Some(PACKAGE_TYPE),
84            datasource_id: Some(DatasourceId::Gitmodules),
85            dependencies,
86            ..Default::default()
87        }]
88    }
89}
90
91struct Submodule {
92    path: String,
93    url: String,
94    purl: Option<String>,
95}
96
97fn parse_gitmodules(content: &str) -> Vec<Submodule> {
98    let mut submodules = Vec::new();
99    let mut current_section: Option<HashMap<String, String>> = None;
100    let mut current_name: Option<String> = None;
101
102    for line in content.lines().take(MAX_ITERATION_COUNT) {
103        let line = line.trim();
104
105        if line.is_empty() || line.starts_with('#') || line.starts_with(';') {
106            continue;
107        }
108
109        if line.starts_with('[') && line.ends_with(']') {
110            if let Some(section) = current_section.take()
111                && let Some(name) = current_name.take()
112                && let Some(submodule) = build_submodule(name, section)
113            {
114                submodules.push(submodule);
115            }
116
117            let section_name = &line[1..line.len() - 1];
118            if let Some(stripped) = section_name.strip_prefix("submodule ") {
119                current_name = Some(truncate_field(stripped.trim_matches('"').to_string()));
120                current_section = Some(HashMap::new());
121            }
122        } else if let Some(ref mut section) = current_section
123            && let Some((key, value)) = line.split_once('=')
124        {
125            let key = truncate_field(key.trim().to_string());
126            let value = truncate_field(value.trim().to_string());
127            section.insert(key, value);
128        }
129    }
130
131    if let Some(section) = current_section
132        && let Some(name) = current_name
133        && let Some(submodule) = build_submodule(name, section)
134    {
135        submodules.push(submodule);
136    }
137
138    submodules
139}
140
141fn build_submodule(_name: String, section: HashMap<String, String>) -> Option<Submodule> {
142    let path = truncate_field(section.get("path").cloned().unwrap_or_default());
143    let url = truncate_field(section.get("url").cloned().unwrap_or_default());
144
145    if path.is_empty() && url.is_empty() {
146        return None;
147    }
148
149    let purl = build_purl_from_url(&url);
150
151    Some(Submodule { path, url, purl })
152}
153
154fn build_purl_from_url(url: &str) -> Option<String> {
155    if url.is_empty() {
156        return None;
157    }
158
159    if let Some(purl) = parse_github_url(url) {
160        return Some(purl);
161    }
162
163    if let Some(purl) = parse_gitlab_url(url) {
164        return Some(purl);
165    }
166
167    None
168}
169
170fn parse_github_url(url: &str) -> Option<String> {
171    let (namespace, name) = if url.starts_with("https://github.com/") {
172        let path = url.strip_prefix("https://github.com/")?;
173        parse_repo_path(path)?
174    } else if url.starts_with("git@github.com:") {
175        let path = url.strip_prefix("git@github.com:")?;
176        parse_repo_path(path)?
177    } else {
178        return None;
179    };
180
181    Some(truncate_field(format!("pkg:github/{}/{}", namespace, name)))
182}
183
184fn parse_gitlab_url(url: &str) -> Option<String> {
185    let (namespace, name) = if url.starts_with("https://gitlab.com/") {
186        let path = url.strip_prefix("https://gitlab.com/")?;
187        parse_repo_path(path)?
188    } else if url.starts_with("git@gitlab.com:") {
189        let path = url.strip_prefix("git@gitlab.com:")?;
190        parse_repo_path(path)?
191    } else {
192        return None;
193    };
194
195    Some(truncate_field(format!("pkg:gitlab/{}/{}", namespace, name)))
196}
197
198fn parse_repo_path(path: &str) -> Option<(String, String)> {
199    let path = path.strip_suffix(".git").unwrap_or(path);
200    let parts: Vec<&str> = path.split('/').collect();
201
202    if parts.len() < 2 {
203        return None;
204    }
205
206    let name = truncate_field(parts.last()?.to_string());
207    let namespace = truncate_field(parts[..parts.len() - 1].join("/"));
208
209    if namespace.is_empty() || name.is_empty() {
210        return None;
211    }
212
213    Some((namespace, name))
214}
215
216#[cfg(test)]
217mod tests {
218    use super::*;
219    use std::io::Write;
220    use tempfile::NamedTempFile;
221
222    fn create_gitmodules_file(content: &str) -> NamedTempFile {
223        let mut file = NamedTempFile::new().unwrap();
224        file.write_all(content.as_bytes()).unwrap();
225        file
226    }
227
228    #[test]
229    fn test_is_match() {
230        assert!(GitmodulesParser::is_match(Path::new(".gitmodules")));
231        assert!(GitmodulesParser::is_match(Path::new(
232            "/path/to/.gitmodules"
233        )));
234        assert!(!GitmodulesParser::is_match(Path::new("gitmodules")));
235        assert!(!GitmodulesParser::is_match(Path::new(".gitmodules.bak")));
236    }
237
238    #[test]
239    fn test_parse_single_submodule() {
240        let content = r#"
241[submodule "dep-lib"]
242    path = lib/dep
243    url = https://github.com/user/dep-lib.git
244"#;
245        let file = create_gitmodules_file(content);
246        let pkgs = GitmodulesParser::extract_packages(file.path());
247        assert_eq!(pkgs.len(), 1);
248        assert_eq!(pkgs[0].dependencies.len(), 1);
249        let dep = &pkgs[0].dependencies[0];
250        assert_eq!(dep.purl, Some("pkg:github/user/dep-lib".to_string()));
251    }
252
253    #[test]
254    fn test_parse_multiple_submodules() {
255        let content = r#"
256[submodule "lib1"]
257    path = libs/lib1
258    url = https://github.com/org/lib1.git
259
260[submodule "lib2"]
261    path = libs/lib2
262    url = git@github.com:org/lib2.git
263"#;
264        let file = create_gitmodules_file(content);
265        let pkgs = GitmodulesParser::extract_packages(file.path());
266        assert_eq!(pkgs.len(), 1);
267        assert_eq!(pkgs[0].dependencies.len(), 2);
268    }
269
270    #[test]
271    fn test_parse_git_ssh_url() {
272        let content = r#"
273[submodule "private-repo"]
274    path = private
275    url = git@github.com:company/private-repo.git
276"#;
277        let file = create_gitmodules_file(content);
278        let pkgs = GitmodulesParser::extract_packages(file.path());
279        let dep = &pkgs[0].dependencies[0];
280        assert_eq!(
281            dep.purl,
282            Some("pkg:github/company/private-repo".to_string())
283        );
284    }
285
286    #[test]
287    fn test_parse_gitlab_url() {
288        let content = r#"
289[submodule "gitlab-dep"]
290    path = gitlab-lib
291    url = https://gitlab.com/group/project.git
292"#;
293        let file = create_gitmodules_file(content);
294        let pkgs = GitmodulesParser::extract_packages(file.path());
295        let dep = &pkgs[0].dependencies[0];
296        assert_eq!(dep.purl, Some("pkg:gitlab/group/project".to_string()));
297    }
298
299    #[test]
300    fn test_parse_unknown_url() {
301        let content = r#"
302[submodule "custom"]
303    path = custom
304    url = https://example.com/repo.git
305"#;
306        let file = create_gitmodules_file(content);
307        let pkgs = GitmodulesParser::extract_packages(file.path());
308        let dep = &pkgs[0].dependencies[0];
309        assert!(dep.purl.is_none());
310        assert!(
311            dep.extracted_requirement
312                .as_ref()
313                .unwrap()
314                .contains("https://example.com/repo.git")
315        );
316    }
317
318    #[test]
319    fn test_parse_empty_file() {
320        let content = "";
321        let file = create_gitmodules_file(content);
322        let pkgs = GitmodulesParser::extract_packages(file.path());
323        assert_eq!(pkgs.len(), 1);
324        assert!(pkgs[0].dependencies.is_empty());
325    }
326
327    #[test]
328    fn test_parse_with_comments() {
329        let content = r#"
330# This is a comment
331[submodule "lib"]
332    ; another comment
333    path = lib
334    url = https://github.com/user/lib.git
335"#;
336        let file = create_gitmodules_file(content);
337        let pkgs = GitmodulesParser::extract_packages(file.path());
338        assert_eq!(pkgs[0].dependencies.len(), 1);
339    }
340}
341
342crate::register_parser!(
343    "Git submodules manifest",
344    &["**/.gitmodules"],
345    "gitmodules",
346    "",
347    Some("https://git-scm.com/docs/gitmodules"),
348);