Skip to main content

provenant/parsers/
gitmodules.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for Git submodule manifest files (`.gitmodules`).
5//!
6//! Extracts submodule dependencies from `.gitmodules` files, treating
7//! git submodules as package dependencies.
8//!
9//! # Supported Formats
10//! - `.gitmodules` (Git submodule configuration)
11//!
12//! # Key Features
13//! - Parses INI-style `.gitmodules` format
14//! - Extracts submodule name, path, and URL
15//! - Generates purl for GitHub/GitLab URLs when possible
16//! - Reports submodules as dependencies
17//!
18//! # Implementation Notes
19//! - Git submodules are treated as dependencies of the containing repository
20//! - URLs are parsed to extract package name when possible
21//! - Supports both https and git@ URL formats
22
23use std::collections::HashMap;
24use std::path::Path;
25
26use crate::parser_warn as warn;
27
28use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
29use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
30
31use super::PackageParser;
32use super::metadata::ParserMetadata;
33
34const PACKAGE_TYPE: PackageType = PackageType::Github;
35
36fn default_package_data() -> PackageData {
37    PackageData {
38        package_type: Some(PACKAGE_TYPE),
39        datasource_id: Some(DatasourceId::Gitmodules),
40        ..Default::default()
41    }
42}
43
44pub struct GitmodulesParser;
45
46impl PackageParser for GitmodulesParser {
47    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
48
49    fn metadata() -> Vec<ParserMetadata> {
50        vec![ParserMetadata {
51            description: "Git submodules manifest",
52            file_patterns: &["**/.gitmodules"],
53            package_type: "gitmodules",
54            primary_language: "",
55            documentation_url: Some("https://git-scm.com/docs/gitmodules"),
56        }]
57    }
58
59    fn is_match(path: &Path) -> bool {
60        path.file_name().is_some_and(|name| name == ".gitmodules")
61    }
62
63    fn extract_packages(path: &Path) -> Vec<PackageData> {
64        let content = match read_file_to_string(path, None) {
65            Ok(c) => c,
66            Err(e) => {
67                warn!("Failed to read .gitmodules {:?}: {}", path, e);
68                return vec![default_package_data()];
69            }
70        };
71
72        let submodules = parse_gitmodules(&content);
73        if submodules.is_empty() {
74            return vec![default_package_data()];
75        }
76
77        let dependencies: Vec<Dependency> = submodules
78            .into_iter()
79            .take(MAX_ITERATION_COUNT)
80            .map(|sub| Dependency {
81                purl: sub.purl.map(truncate_field),
82                extracted_requirement: Some(truncate_field(format!("{} at {}", sub.path, sub.url))),
83                scope: Some(truncate_field("runtime".to_string())),
84                is_runtime: Some(true),
85                is_optional: Some(false),
86                is_direct: Some(true),
87                resolved_package: None,
88                extra_data: None,
89                is_pinned: Some(false),
90            })
91            .collect();
92
93        vec![PackageData {
94            package_type: Some(PACKAGE_TYPE),
95            datasource_id: Some(DatasourceId::Gitmodules),
96            dependencies,
97            ..Default::default()
98        }]
99    }
100}
101
102struct Submodule {
103    path: String,
104    url: String,
105    purl: Option<String>,
106}
107
108fn parse_gitmodules(content: &str) -> Vec<Submodule> {
109    let mut submodules = Vec::new();
110    let mut current_section: Option<HashMap<String, String>> = None;
111    let mut current_name: Option<String> = None;
112
113    for line in content.lines().take(MAX_ITERATION_COUNT) {
114        let line = line.trim();
115
116        if line.is_empty() || line.starts_with('#') || line.starts_with(';') {
117            continue;
118        }
119
120        if line.starts_with('[') && line.ends_with(']') {
121            if let Some(section) = current_section.take()
122                && let Some(name) = current_name.take()
123                && let Some(submodule) = build_submodule(name, section)
124            {
125                submodules.push(submodule);
126            }
127
128            let section_name = &line[1..line.len() - 1];
129            if let Some(stripped) = section_name.strip_prefix("submodule ") {
130                current_name = Some(truncate_field(stripped.trim_matches('"').to_string()));
131                current_section = Some(HashMap::new());
132            }
133        } else if let Some(ref mut section) = current_section
134            && let Some((key, value)) = line.split_once('=')
135        {
136            let key = truncate_field(key.trim().to_string());
137            let value = truncate_field(value.trim().to_string());
138            section.insert(key, value);
139        }
140    }
141
142    if let Some(section) = current_section
143        && let Some(name) = current_name
144        && let Some(submodule) = build_submodule(name, section)
145    {
146        submodules.push(submodule);
147    }
148
149    submodules
150}
151
152fn build_submodule(_name: String, section: HashMap<String, String>) -> Option<Submodule> {
153    let path = truncate_field(section.get("path").cloned().unwrap_or_default());
154    let url = truncate_field(section.get("url").cloned().unwrap_or_default());
155
156    if path.is_empty() && url.is_empty() {
157        return None;
158    }
159
160    let purl = build_purl_from_url(&url);
161
162    Some(Submodule { path, url, purl })
163}
164
165fn build_purl_from_url(url: &str) -> Option<String> {
166    if url.is_empty() {
167        return None;
168    }
169
170    if let Some(purl) = parse_github_url(url) {
171        return Some(purl);
172    }
173
174    if let Some(purl) = parse_gitlab_url(url) {
175        return Some(purl);
176    }
177
178    None
179}
180
181fn parse_github_url(url: &str) -> Option<String> {
182    let (namespace, name) = if url.starts_with("https://github.com/") {
183        let path = url.strip_prefix("https://github.com/")?;
184        parse_repo_path(path)?
185    } else if url.starts_with("git@github.com:") {
186        let path = url.strip_prefix("git@github.com:")?;
187        parse_repo_path(path)?
188    } else {
189        return None;
190    };
191
192    Some(truncate_field(format!("pkg:github/{}/{}", namespace, name)))
193}
194
195fn parse_gitlab_url(url: &str) -> Option<String> {
196    let (namespace, name) = if url.starts_with("https://gitlab.com/") {
197        let path = url.strip_prefix("https://gitlab.com/")?;
198        parse_repo_path(path)?
199    } else if url.starts_with("git@gitlab.com:") {
200        let path = url.strip_prefix("git@gitlab.com:")?;
201        parse_repo_path(path)?
202    } else {
203        return None;
204    };
205
206    Some(truncate_field(format!("pkg:gitlab/{}/{}", namespace, name)))
207}
208
209fn parse_repo_path(path: &str) -> Option<(String, String)> {
210    let path = path.strip_suffix(".git").unwrap_or(path);
211    let parts: Vec<&str> = path.split('/').collect();
212
213    if parts.len() < 2 {
214        return None;
215    }
216
217    let name = truncate_field(parts.last()?.to_string());
218    let namespace = truncate_field(parts[..parts.len() - 1].join("/"));
219
220    if namespace.is_empty() || name.is_empty() {
221        return None;
222    }
223
224    Some((namespace, name))
225}
226
227#[cfg(test)]
228mod tests {
229    use super::*;
230    use std::io::Write;
231    use tempfile::NamedTempFile;
232
233    fn create_gitmodules_file(content: &str) -> NamedTempFile {
234        let mut file = NamedTempFile::new().unwrap();
235        file.write_all(content.as_bytes()).unwrap();
236        file
237    }
238
239    #[test]
240    fn test_is_match() {
241        assert!(GitmodulesParser::is_match(Path::new(".gitmodules")));
242        assert!(GitmodulesParser::is_match(Path::new(
243            "/path/to/.gitmodules"
244        )));
245        assert!(!GitmodulesParser::is_match(Path::new("gitmodules")));
246        assert!(!GitmodulesParser::is_match(Path::new(".gitmodules.bak")));
247    }
248
249    #[test]
250    fn test_parse_single_submodule() {
251        let content = r#"
252[submodule "dep-lib"]
253    path = lib/dep
254    url = https://github.com/user/dep-lib.git
255"#;
256        let file = create_gitmodules_file(content);
257        let pkgs = GitmodulesParser::extract_packages(file.path());
258        assert_eq!(pkgs.len(), 1);
259        assert_eq!(pkgs[0].dependencies.len(), 1);
260        let dep = &pkgs[0].dependencies[0];
261        assert_eq!(dep.purl, Some("pkg:github/user/dep-lib".to_string()));
262    }
263
264    #[test]
265    fn test_parse_multiple_submodules() {
266        let content = r#"
267[submodule "lib1"]
268    path = libs/lib1
269    url = https://github.com/org/lib1.git
270
271[submodule "lib2"]
272    path = libs/lib2
273    url = git@github.com:org/lib2.git
274"#;
275        let file = create_gitmodules_file(content);
276        let pkgs = GitmodulesParser::extract_packages(file.path());
277        assert_eq!(pkgs.len(), 1);
278        assert_eq!(pkgs[0].dependencies.len(), 2);
279    }
280
281    #[test]
282    fn test_parse_git_ssh_url() {
283        let content = r#"
284[submodule "private-repo"]
285    path = private
286    url = git@github.com:company/private-repo.git
287"#;
288        let file = create_gitmodules_file(content);
289        let pkgs = GitmodulesParser::extract_packages(file.path());
290        let dep = &pkgs[0].dependencies[0];
291        assert_eq!(
292            dep.purl,
293            Some("pkg:github/company/private-repo".to_string())
294        );
295    }
296
297    #[test]
298    fn test_parse_gitlab_url() {
299        let content = r#"
300[submodule "gitlab-dep"]
301    path = gitlab-lib
302    url = https://gitlab.com/group/project.git
303"#;
304        let file = create_gitmodules_file(content);
305        let pkgs = GitmodulesParser::extract_packages(file.path());
306        let dep = &pkgs[0].dependencies[0];
307        assert_eq!(dep.purl, Some("pkg:gitlab/group/project".to_string()));
308    }
309
310    #[test]
311    fn test_parse_unknown_url() {
312        let content = r#"
313[submodule "custom"]
314    path = custom
315    url = https://example.com/repo.git
316"#;
317        let file = create_gitmodules_file(content);
318        let pkgs = GitmodulesParser::extract_packages(file.path());
319        let dep = &pkgs[0].dependencies[0];
320        assert!(dep.purl.is_none());
321        assert!(
322            dep.extracted_requirement
323                .as_ref()
324                .unwrap()
325                .contains("https://example.com/repo.git")
326        );
327    }
328
329    #[test]
330    fn test_parse_empty_file() {
331        let content = "";
332        let file = create_gitmodules_file(content);
333        let pkgs = GitmodulesParser::extract_packages(file.path());
334        assert_eq!(pkgs.len(), 1);
335        assert!(pkgs[0].dependencies.is_empty());
336    }
337
338    #[test]
339    fn test_parse_with_comments() {
340        let content = r#"
341# This is a comment
342[submodule "lib"]
343    ; another comment
344    path = lib
345    url = https://github.com/user/lib.git
346"#;
347        let file = create_gitmodules_file(content);
348        let pkgs = GitmodulesParser::extract_packages(file.path());
349        assert_eq!(pkgs[0].dependencies.len(), 1);
350    }
351}