Skip to main content

yarli_cli/yarli-git/src/
submodule.rs

1//! Submodule policy enforcement (Section 12.4).
2//!
3//! Provides detection of uninitialized/dirty submodules and policy-based
4//! validation of submodule SHA changes across merges.
5
6use crate::yarli_core::entities::worktree_binding::SubmoduleMode;
7
8use crate::yarli_git::constants::*;
9use crate::yarli_git::error::GitError;
10
11/// A single entry from `git submodule status` output.
12#[derive(Debug, Clone, PartialEq, Eq)]
13pub struct SubmoduleEntry {
14    /// Status prefix: ' ' = ok, '-' = uninit, '+' = modified, 'U' = conflict.
15    pub status: SubmoduleStatus,
16    /// Current SHA of the submodule.
17    pub sha: String,
18    /// Path of the submodule relative to repo root.
19    pub path: String,
20    /// Optional branch/descriptor in parentheses (may be empty).
21    pub descriptor: Option<String>,
22}
23
24/// Status of a single submodule.
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub enum SubmoduleStatus {
27    /// Submodule is at the recorded SHA (space prefix or no prefix).
28    Current,
29    /// Submodule is not initialized (dash prefix).
30    Uninitialized,
31    /// Submodule has been modified / checked out to different SHA (plus prefix).
32    Modified,
33    /// Submodule has a merge conflict (U prefix).
34    Conflict,
35}
36
37/// Parse `git submodule status` output into structured entries.
38///
39/// Format per line: `<prefix><sha> <path> (<descriptor>)`
40/// Prefix is one of: ' ', '-', '+', 'U'
41pub fn parse_submodule_status(stdout: &str) -> Vec<SubmoduleEntry> {
42    stdout
43        .lines()
44        .filter(|l| !l.trim().is_empty())
45        .filter_map(|line| {
46            // The line starts with an optional status char, then 40-char SHA, then space, then path.
47            // Examples:
48            //  abc123...  path/to/sub (v1.0)
49            // -abc123...  path/to/sub
50            // +abc123...  path/to/sub (v1.0-dirty)
51            // Uabc123...  path/to/sub
52            let trimmed = line.trim_start();
53            if trimmed.is_empty() {
54                return None;
55            }
56
57            let first = trimmed.chars().next()?;
58            let (status, rest) = match first {
59                c if c == SUBMODULE_UNINIT_PREFIX => {
60                    (SubmoduleStatus::Uninitialized, &trimmed[1..])
61                }
62                c if c == SUBMODULE_MODIFIED_PREFIX => (SubmoduleStatus::Modified, &trimmed[1..]),
63                c if c == SUBMODULE_CONFLICT_PREFIX => (SubmoduleStatus::Conflict, &trimmed[1..]),
64                c if c.is_ascii_hexdigit() => (SubmoduleStatus::Current, trimmed),
65                ' ' => (SubmoduleStatus::Current, &trimmed[1..]),
66                _ => return None,
67            };
68
69            // Extract SHA (up to first space).
70            let sha_end = rest.find(' ')?;
71            let sha = rest[..sha_end].to_string();
72            let after_sha = rest[sha_end..].trim_start();
73
74            // Extract path (up to optional parenthesized descriptor).
75            let (path, descriptor) = if let Some(paren_start) = after_sha.find(" (") {
76                let path = after_sha[..paren_start].to_string();
77                let desc = after_sha[paren_start + 2..]
78                    .trim_end_matches(')')
79                    .to_string();
80                (path, Some(desc))
81            } else {
82                (after_sha.to_string(), None)
83            };
84
85            Some(SubmoduleEntry {
86                status,
87                sha,
88                path,
89                descriptor,
90            })
91        })
92        .collect()
93}
94
95/// Find submodules that are uninitialized.
96pub fn find_uninitialized(entries: &[SubmoduleEntry]) -> Vec<&SubmoduleEntry> {
97    entries
98        .iter()
99        .filter(|e| e.status == SubmoduleStatus::Uninitialized)
100        .collect()
101}
102
103/// Find submodules that are dirty (modified or conflicted).
104pub fn find_dirty(entries: &[SubmoduleEntry]) -> Vec<&SubmoduleEntry> {
105    entries
106        .iter()
107        .filter(|e| e.status == SubmoduleStatus::Modified || e.status == SubmoduleStatus::Conflict)
108        .collect()
109}
110
111/// Check submodule policy between before/after states.
112///
113/// Compares submodule SHAs before and after an operation and validates
114/// against the configured policy mode.
115pub fn check_policy(
116    mode: SubmoduleMode,
117    before: &[SubmoduleEntry],
118    after: &[SubmoduleEntry],
119) -> Result<(), GitError> {
120    match mode {
121        SubmoduleMode::AllowAny => Ok(()),
122        SubmoduleMode::Locked => check_locked_policy(before, after),
123        SubmoduleMode::AllowFastForward => {
124            // AllowFastForward: SHAs may change but we can't verify FF without
125            // git ancestry checks. At the submodule status level we just check
126            // that no new uninitialized or conflicted submodules appeared.
127            // The actual FF verification requires per-submodule `git merge-base --is-ancestor`.
128            check_no_regressions(before, after)
129        }
130    }
131}
132
133/// Locked mode: no submodule SHAs may change.
134fn check_locked_policy(
135    before: &[SubmoduleEntry],
136    after: &[SubmoduleEntry],
137) -> Result<(), GitError> {
138    // Build a map of path -> sha from before.
139    let before_map: std::collections::HashMap<&str, &str> = before
140        .iter()
141        .map(|e| (e.path.as_str(), e.sha.as_str()))
142        .collect();
143
144    for entry in after {
145        if let Some(&before_sha) = before_map.get(entry.path.as_str()) {
146            if before_sha != entry.sha {
147                return Err(GitError::SubmodulePolicyViolation {
148                    path: entry.path.clone(),
149                    reason: format!(
150                        "SHA changed from {} to {} (Locked mode forbids changes)",
151                        &before_sha[..8.min(before_sha.len())],
152                        &entry.sha[..8.min(entry.sha.len())],
153                    ),
154                });
155            }
156        }
157        // New submodules appearing is also a change.
158        if !before_map.contains_key(entry.path.as_str()) {
159            return Err(GitError::SubmodulePolicyViolation {
160                path: entry.path.clone(),
161                reason: "new submodule added (Locked mode forbids changes)".into(),
162            });
163        }
164    }
165
166    // Check for removed submodules.
167    let after_paths: std::collections::HashSet<&str> =
168        after.iter().map(|e| e.path.as_str()).collect();
169    for entry in before {
170        if !after_paths.contains(entry.path.as_str()) {
171            return Err(GitError::SubmodulePolicyViolation {
172                path: entry.path.clone(),
173                reason: "submodule removed (Locked mode forbids changes)".into(),
174            });
175        }
176    }
177
178    Ok(())
179}
180
181/// Check that no submodules regressed to uninitialized or conflict state.
182fn check_no_regressions(
183    before: &[SubmoduleEntry],
184    after: &[SubmoduleEntry],
185) -> Result<(), GitError> {
186    let before_map: std::collections::HashMap<&str, &SubmoduleEntry> =
187        before.iter().map(|e| (e.path.as_str(), e)).collect();
188
189    for entry in after {
190        // A submodule that was initialized but is now uninitialized is a regression.
191        if entry.status == SubmoduleStatus::Uninitialized {
192            if let Some(before_entry) = before_map.get(entry.path.as_str()) {
193                if before_entry.status != SubmoduleStatus::Uninitialized {
194                    return Err(GitError::SubmodulePolicyViolation {
195                        path: entry.path.clone(),
196                        reason: "submodule became uninitialized".into(),
197                    });
198                }
199            }
200        }
201        // Conflicts are always a violation.
202        if entry.status == SubmoduleStatus::Conflict {
203            return Err(GitError::SubmodulePolicyViolation {
204                path: entry.path.clone(),
205                reason: "submodule has merge conflict".into(),
206            });
207        }
208    }
209
210    Ok(())
211}
212
213#[cfg(test)]
214mod tests {
215    use super::*;
216
217    // ── Parsing tests ────────────────────────────────────────────────
218
219    #[test]
220    fn parse_empty_output() {
221        let entries = parse_submodule_status("");
222        assert!(entries.is_empty());
223    }
224
225    #[test]
226    fn parse_current_submodule() {
227        let output = " abc123def456789012345678901234567890ab vendor/lib (v1.0)\n";
228        let entries = parse_submodule_status(output);
229        assert_eq!(entries.len(), 1);
230        assert_eq!(entries[0].status, SubmoduleStatus::Current);
231        assert_eq!(entries[0].sha, "abc123def456789012345678901234567890ab");
232        assert_eq!(entries[0].path, "vendor/lib");
233        assert_eq!(entries[0].descriptor.as_deref(), Some("v1.0"));
234    }
235
236    #[test]
237    fn parse_uninitialized_submodule() {
238        let output = "-abc123def456789012345678901234567890ab vendor/uninitialized\n";
239        let entries = parse_submodule_status(output);
240        assert_eq!(entries.len(), 1);
241        assert_eq!(entries[0].status, SubmoduleStatus::Uninitialized);
242        assert_eq!(entries[0].path, "vendor/uninitialized");
243    }
244
245    #[test]
246    fn parse_modified_submodule() {
247        let output = "+abc123def456789012345678901234567890ab vendor/dirty (v1.0-1-gabcdef)\n";
248        let entries = parse_submodule_status(output);
249        assert_eq!(entries.len(), 1);
250        assert_eq!(entries[0].status, SubmoduleStatus::Modified);
251        assert_eq!(entries[0].path, "vendor/dirty");
252        assert_eq!(entries[0].descriptor.as_deref(), Some("v1.0-1-gabcdef"));
253    }
254
255    #[test]
256    fn parse_conflict_submodule() {
257        let output = "Uabc123def456789012345678901234567890ab vendor/conflict\n";
258        let entries = parse_submodule_status(output);
259        assert_eq!(entries.len(), 1);
260        assert_eq!(entries[0].status, SubmoduleStatus::Conflict);
261        assert_eq!(entries[0].path, "vendor/conflict");
262    }
263
264    #[test]
265    fn parse_multiple_submodules() {
266        let output = " abc123def456789012345678901234567890ab vendor/a (v1.0)\n\
267                       -def456789012345678901234567890abcdef01 vendor/b\n\
268                       +789012345678901234567890abcdef0123456789 vendor/c (v2.1-dirty)\n";
269        let entries = parse_submodule_status(output);
270        assert_eq!(entries.len(), 3);
271        assert_eq!(entries[0].status, SubmoduleStatus::Current);
272        assert_eq!(entries[1].status, SubmoduleStatus::Uninitialized);
273        assert_eq!(entries[2].status, SubmoduleStatus::Modified);
274    }
275
276    #[test]
277    fn parse_submodule_without_descriptor() {
278        let output = " abc123def456789012345678901234567890ab vendor/lib\n";
279        let entries = parse_submodule_status(output);
280        assert_eq!(entries.len(), 1);
281        assert!(entries[0].descriptor.is_none());
282    }
283
284    // ── Find helpers ─────────────────────────────────────────────────
285
286    #[test]
287    fn find_uninitialized_returns_only_uninit() {
288        let entries = vec![
289            SubmoduleEntry {
290                status: SubmoduleStatus::Current,
291                sha: "a".repeat(40),
292                path: "vendor/a".into(),
293                descriptor: None,
294            },
295            SubmoduleEntry {
296                status: SubmoduleStatus::Uninitialized,
297                sha: "b".repeat(40),
298                path: "vendor/b".into(),
299                descriptor: None,
300            },
301            SubmoduleEntry {
302                status: SubmoduleStatus::Modified,
303                sha: "c".repeat(40),
304                path: "vendor/c".into(),
305                descriptor: None,
306            },
307        ];
308        let uninit = find_uninitialized(&entries);
309        assert_eq!(uninit.len(), 1);
310        assert_eq!(uninit[0].path, "vendor/b");
311    }
312
313    #[test]
314    fn find_dirty_returns_modified_and_conflict() {
315        let entries = vec![
316            SubmoduleEntry {
317                status: SubmoduleStatus::Current,
318                sha: "a".repeat(40),
319                path: "vendor/a".into(),
320                descriptor: None,
321            },
322            SubmoduleEntry {
323                status: SubmoduleStatus::Modified,
324                sha: "b".repeat(40),
325                path: "vendor/b".into(),
326                descriptor: None,
327            },
328            SubmoduleEntry {
329                status: SubmoduleStatus::Conflict,
330                sha: "c".repeat(40),
331                path: "vendor/c".into(),
332                descriptor: None,
333            },
334        ];
335        let dirty = find_dirty(&entries);
336        assert_eq!(dirty.len(), 2);
337    }
338
339    // ── Policy tests ─────────────────────────────────────────────────
340
341    #[test]
342    fn locked_policy_allows_no_changes() {
343        let before = vec![SubmoduleEntry {
344            status: SubmoduleStatus::Current,
345            sha: "a".repeat(40),
346            path: "vendor/lib".into(),
347            descriptor: None,
348        }];
349        let after = before.clone();
350        assert!(check_policy(SubmoduleMode::Locked, &before, &after).is_ok());
351    }
352
353    #[test]
354    fn locked_policy_rejects_sha_change() {
355        let before = vec![SubmoduleEntry {
356            status: SubmoduleStatus::Current,
357            sha: "a".repeat(40),
358            path: "vendor/lib".into(),
359            descriptor: None,
360        }];
361        let after = vec![SubmoduleEntry {
362            status: SubmoduleStatus::Current,
363            sha: "b".repeat(40),
364            path: "vendor/lib".into(),
365            descriptor: None,
366        }];
367        let result = check_policy(SubmoduleMode::Locked, &before, &after);
368        assert!(result.is_err());
369        match result.unwrap_err() {
370            GitError::SubmodulePolicyViolation { path, reason } => {
371                assert_eq!(path, "vendor/lib");
372                assert!(reason.contains("Locked mode"));
373            }
374            other => panic!("expected SubmodulePolicyViolation, got {other:?}"),
375        }
376    }
377
378    #[test]
379    fn locked_policy_rejects_new_submodule() {
380        let before = vec![];
381        let after = vec![SubmoduleEntry {
382            status: SubmoduleStatus::Current,
383            sha: "a".repeat(40),
384            path: "vendor/new".into(),
385            descriptor: None,
386        }];
387        let result = check_policy(SubmoduleMode::Locked, &before, &after);
388        assert!(result.is_err());
389        match result.unwrap_err() {
390            GitError::SubmodulePolicyViolation { path, reason } => {
391                assert_eq!(path, "vendor/new");
392                assert!(reason.contains("new submodule"));
393            }
394            other => panic!("expected SubmodulePolicyViolation, got {other:?}"),
395        }
396    }
397
398    #[test]
399    fn locked_policy_rejects_removed_submodule() {
400        let before = vec![SubmoduleEntry {
401            status: SubmoduleStatus::Current,
402            sha: "a".repeat(40),
403            path: "vendor/gone".into(),
404            descriptor: None,
405        }];
406        let after = vec![];
407        let result = check_policy(SubmoduleMode::Locked, &before, &after);
408        assert!(result.is_err());
409        match result.unwrap_err() {
410            GitError::SubmodulePolicyViolation { path, reason } => {
411                assert_eq!(path, "vendor/gone");
412                assert!(reason.contains("removed"));
413            }
414            other => panic!("expected SubmodulePolicyViolation, got {other:?}"),
415        }
416    }
417
418    #[test]
419    fn allow_any_accepts_all_changes() {
420        let before = vec![SubmoduleEntry {
421            status: SubmoduleStatus::Current,
422            sha: "a".repeat(40),
423            path: "vendor/lib".into(),
424            descriptor: None,
425        }];
426        let after = vec![SubmoduleEntry {
427            status: SubmoduleStatus::Modified,
428            sha: "b".repeat(40),
429            path: "vendor/lib".into(),
430            descriptor: None,
431        }];
432        assert!(check_policy(SubmoduleMode::AllowAny, &before, &after).is_ok());
433    }
434
435    #[test]
436    fn allow_ff_rejects_conflict() {
437        let before = vec![SubmoduleEntry {
438            status: SubmoduleStatus::Current,
439            sha: "a".repeat(40),
440            path: "vendor/lib".into(),
441            descriptor: None,
442        }];
443        let after = vec![SubmoduleEntry {
444            status: SubmoduleStatus::Conflict,
445            sha: "a".repeat(40),
446            path: "vendor/lib".into(),
447            descriptor: None,
448        }];
449        let result = check_policy(SubmoduleMode::AllowFastForward, &before, &after);
450        assert!(result.is_err());
451        match result.unwrap_err() {
452            GitError::SubmodulePolicyViolation { path, reason } => {
453                assert_eq!(path, "vendor/lib");
454                assert!(reason.contains("conflict"));
455            }
456            other => panic!("expected SubmodulePolicyViolation, got {other:?}"),
457        }
458    }
459
460    #[test]
461    fn allow_ff_rejects_regression_to_uninit() {
462        let before = vec![SubmoduleEntry {
463            status: SubmoduleStatus::Current,
464            sha: "a".repeat(40),
465            path: "vendor/lib".into(),
466            descriptor: None,
467        }];
468        let after = vec![SubmoduleEntry {
469            status: SubmoduleStatus::Uninitialized,
470            sha: "a".repeat(40),
471            path: "vendor/lib".into(),
472            descriptor: None,
473        }];
474        let result = check_policy(SubmoduleMode::AllowFastForward, &before, &after);
475        assert!(result.is_err());
476        match result.unwrap_err() {
477            GitError::SubmodulePolicyViolation { path, reason } => {
478                assert_eq!(path, "vendor/lib");
479                assert!(reason.contains("uninitialized"));
480            }
481            other => panic!("expected SubmodulePolicyViolation, got {other:?}"),
482        }
483    }
484
485    #[test]
486    fn allow_ff_accepts_sha_change_without_regression() {
487        let before = vec![SubmoduleEntry {
488            status: SubmoduleStatus::Current,
489            sha: "a".repeat(40),
490            path: "vendor/lib".into(),
491            descriptor: None,
492        }];
493        let after = vec![SubmoduleEntry {
494            status: SubmoduleStatus::Modified,
495            sha: "b".repeat(40),
496            path: "vendor/lib".into(),
497            descriptor: None,
498        }];
499        assert!(check_policy(SubmoduleMode::AllowFastForward, &before, &after).is_ok());
500    }
501}