Skip to main content

provenant/parsers/
gitmodules.rs

1//! Parser for Git submodule manifest files (`.gitmodules`).
2//!
3//! Extracts submodule dependencies from `.gitmodules` files, treating
4//! git submodules as package dependencies.
5//!
6//! # Supported Formats
7//! - `.gitmodules` (Git submodule configuration)
8//!
9//! # Key Features
10//! - Parses INI-style `.gitmodules` format
11//! - Extracts submodule name, path, and URL
12//! - Generates purl for GitHub/GitLab URLs when possible
13//! - Reports submodules as dependencies
14//!
15//! # Implementation Notes
16//! - Git submodules are treated as dependencies of the containing repository
17//! - URLs are parsed to extract package name when possible
18//! - Supports both https and git@ URL formats
19
20use std::collections::HashMap;
21use std::path::Path;
22
23use log::warn;
24
25use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
26use crate::parsers::utils::read_file_to_string;
27
28use super::PackageParser;
29
30const PACKAGE_TYPE: PackageType = PackageType::Github;
31
32fn default_package_data() -> PackageData {
33    PackageData {
34        package_type: Some(PACKAGE_TYPE),
35        datasource_id: Some(DatasourceId::Gitmodules),
36        ..Default::default()
37    }
38}
39
40pub struct GitmodulesParser;
41
42impl PackageParser for GitmodulesParser {
43    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
44
45    fn is_match(path: &Path) -> bool {
46        path.file_name().is_some_and(|name| name == ".gitmodules")
47    }
48
49    fn extract_packages(path: &Path) -> Vec<PackageData> {
50        let content = match read_file_to_string(path) {
51            Ok(c) => c,
52            Err(e) => {
53                warn!("Failed to read .gitmodules {:?}: {}", path, e);
54                return vec![default_package_data()];
55            }
56        };
57
58        let submodules = parse_gitmodules(&content);
59        if submodules.is_empty() {
60            return vec![default_package_data()];
61        }
62
63        let dependencies: Vec<Dependency> = submodules
64            .into_iter()
65            .map(|sub| Dependency {
66                purl: sub.purl,
67                extracted_requirement: Some(format!("{} at {}", sub.path, sub.url)),
68                scope: Some("runtime".to_string()),
69                is_runtime: Some(true),
70                is_optional: Some(false),
71                is_direct: Some(true),
72                resolved_package: None,
73                extra_data: None,
74                is_pinned: Some(false),
75            })
76            .collect();
77
78        vec![PackageData {
79            package_type: Some(PACKAGE_TYPE),
80            datasource_id: Some(DatasourceId::Gitmodules),
81            dependencies,
82            ..Default::default()
83        }]
84    }
85}
86
87struct Submodule {
88    #[allow(dead_code)]
89    name: String,
90    path: String,
91    url: String,
92    purl: Option<String>,
93}
94
95fn parse_gitmodules(content: &str) -> Vec<Submodule> {
96    let mut submodules = Vec::new();
97    let mut current_section: Option<HashMap<String, String>> = None;
98    let mut current_name: Option<String> = None;
99
100    for line in content.lines() {
101        let line = line.trim();
102
103        if line.is_empty() || line.starts_with('#') || line.starts_with(';') {
104            continue;
105        }
106
107        if line.starts_with('[') && line.ends_with(']') {
108            if let Some(section) = current_section.take()
109                && let Some(name) = current_name.take()
110                && let Some(submodule) = build_submodule(name, section)
111            {
112                submodules.push(submodule);
113            }
114
115            let section_name = &line[1..line.len() - 1];
116            if let Some(stripped) = section_name.strip_prefix("submodule ") {
117                current_name = Some(stripped.trim_matches('"').to_string());
118                current_section = Some(HashMap::new());
119            }
120        } else if let Some(ref mut section) = current_section
121            && let Some((key, value)) = line.split_once('=')
122        {
123            let key = key.trim().to_string();
124            let value = value.trim().to_string();
125            section.insert(key, value);
126        }
127    }
128
129    if let Some(section) = current_section
130        && let Some(name) = current_name
131        && let Some(submodule) = build_submodule(name, section)
132    {
133        submodules.push(submodule);
134    }
135
136    submodules
137}
138
139fn build_submodule(name: String, section: HashMap<String, String>) -> Option<Submodule> {
140    let path = section.get("path").cloned().unwrap_or_default();
141    let url = section.get("url").cloned().unwrap_or_default();
142
143    if path.is_empty() && url.is_empty() {
144        return None;
145    }
146
147    let purl = build_purl_from_url(&url);
148
149    Some(Submodule {
150        name,
151        path,
152        url,
153        purl,
154    })
155}
156
157fn build_purl_from_url(url: &str) -> Option<String> {
158    if url.is_empty() {
159        return None;
160    }
161
162    if let Some(purl) = parse_github_url(url) {
163        return Some(purl);
164    }
165
166    if let Some(purl) = parse_gitlab_url(url) {
167        return Some(purl);
168    }
169
170    None
171}
172
173fn parse_github_url(url: &str) -> Option<String> {
174    let (namespace, name) = if url.starts_with("https://github.com/") {
175        let path = url.strip_prefix("https://github.com/")?;
176        parse_repo_path(path)?
177    } else if url.starts_with("git@github.com:") {
178        let path = url.strip_prefix("git@github.com:")?;
179        parse_repo_path(path)?
180    } else {
181        return None;
182    };
183
184    Some(format!("pkg:github/{}/{}", namespace, name))
185}
186
187fn parse_gitlab_url(url: &str) -> Option<String> {
188    let (namespace, name) = if url.starts_with("https://gitlab.com/") {
189        let path = url.strip_prefix("https://gitlab.com/")?;
190        parse_repo_path(path)?
191    } else if url.starts_with("git@gitlab.com:") {
192        let path = url.strip_prefix("git@gitlab.com:")?;
193        parse_repo_path(path)?
194    } else {
195        return None;
196    };
197
198    Some(format!("pkg:gitlab/{}/{}", namespace, name))
199}
200
201fn parse_repo_path(path: &str) -> Option<(String, String)> {
202    let path = path.strip_suffix(".git").unwrap_or(path);
203    let parts: Vec<&str> = path.split('/').collect();
204
205    if parts.len() < 2 {
206        return None;
207    }
208
209    let name = parts.last()?.to_string();
210    let namespace = parts[..parts.len() - 1].join("/");
211
212    if namespace.is_empty() || name.is_empty() {
213        return None;
214    }
215
216    Some((namespace, name))
217}
218
219#[cfg(test)]
220mod tests {
221    use super::*;
222    use std::io::Write;
223    use tempfile::NamedTempFile;
224
225    fn create_gitmodules_file(content: &str) -> NamedTempFile {
226        let mut file = NamedTempFile::new().unwrap();
227        file.write_all(content.as_bytes()).unwrap();
228        file
229    }
230
231    #[test]
232    fn test_is_match() {
233        assert!(GitmodulesParser::is_match(Path::new(".gitmodules")));
234        assert!(GitmodulesParser::is_match(Path::new(
235            "/path/to/.gitmodules"
236        )));
237        assert!(!GitmodulesParser::is_match(Path::new("gitmodules")));
238        assert!(!GitmodulesParser::is_match(Path::new(".gitmodules.bak")));
239    }
240
241    #[test]
242    fn test_parse_single_submodule() {
243        let content = r#"
244[submodule "dep-lib"]
245    path = lib/dep
246    url = https://github.com/user/dep-lib.git
247"#;
248        let file = create_gitmodules_file(content);
249        let pkgs = GitmodulesParser::extract_packages(file.path());
250        assert_eq!(pkgs.len(), 1);
251        assert_eq!(pkgs[0].dependencies.len(), 1);
252        let dep = &pkgs[0].dependencies[0];
253        assert_eq!(dep.purl, Some("pkg:github/user/dep-lib".to_string()));
254    }
255
256    #[test]
257    fn test_parse_multiple_submodules() {
258        let content = r#"
259[submodule "lib1"]
260    path = libs/lib1
261    url = https://github.com/org/lib1.git
262
263[submodule "lib2"]
264    path = libs/lib2
265    url = git@github.com:org/lib2.git
266"#;
267        let file = create_gitmodules_file(content);
268        let pkgs = GitmodulesParser::extract_packages(file.path());
269        assert_eq!(pkgs.len(), 1);
270        assert_eq!(pkgs[0].dependencies.len(), 2);
271    }
272
273    #[test]
274    fn test_parse_git_ssh_url() {
275        let content = r#"
276[submodule "private-repo"]
277    path = private
278    url = git@github.com:company/private-repo.git
279"#;
280        let file = create_gitmodules_file(content);
281        let pkgs = GitmodulesParser::extract_packages(file.path());
282        let dep = &pkgs[0].dependencies[0];
283        assert_eq!(
284            dep.purl,
285            Some("pkg:github/company/private-repo".to_string())
286        );
287    }
288
289    #[test]
290    fn test_parse_gitlab_url() {
291        let content = r#"
292[submodule "gitlab-dep"]
293    path = gitlab-lib
294    url = https://gitlab.com/group/project.git
295"#;
296        let file = create_gitmodules_file(content);
297        let pkgs = GitmodulesParser::extract_packages(file.path());
298        let dep = &pkgs[0].dependencies[0];
299        assert_eq!(dep.purl, Some("pkg:gitlab/group/project".to_string()));
300    }
301
302    #[test]
303    fn test_parse_unknown_url() {
304        let content = r#"
305[submodule "custom"]
306    path = custom
307    url = https://example.com/repo.git
308"#;
309        let file = create_gitmodules_file(content);
310        let pkgs = GitmodulesParser::extract_packages(file.path());
311        let dep = &pkgs[0].dependencies[0];
312        assert!(dep.purl.is_none());
313        assert!(
314            dep.extracted_requirement
315                .as_ref()
316                .unwrap()
317                .contains("https://example.com/repo.git")
318        );
319    }
320
321    #[test]
322    fn test_parse_empty_file() {
323        let content = "";
324        let file = create_gitmodules_file(content);
325        let pkgs = GitmodulesParser::extract_packages(file.path());
326        assert_eq!(pkgs.len(), 1);
327        assert!(pkgs[0].dependencies.is_empty());
328    }
329
330    #[test]
331    fn test_parse_with_comments() {
332        let content = r#"
333# This is a comment
334[submodule "lib"]
335    ; another comment
336    path = lib
337    url = https://github.com/user/lib.git
338"#;
339        let file = create_gitmodules_file(content);
340        let pkgs = GitmodulesParser::extract_packages(file.path());
341        assert_eq!(pkgs[0].dependencies.len(), 1);
342    }
343}
344
345crate::register_parser!(
346    "Git submodules manifest",
347    &["**/.gitmodules"],
348    "gitmodules",
349    "",
350    Some("https://git-scm.com/docs/gitmodules"),
351);