Skip to main content

libverify_core/controls/
scoped_change.rs

1use crate::control::{Control, ControlFinding, ControlId, builtin};
2use crate::evidence::{EvidenceBundle, EvidenceState, GovernedChange};
3use crate::scope::{
4    FileRole, classify_file_role, classify_scope, extract_feature_namespace, is_non_code_file,
5    should_bridge_aux_to_source, should_bridge_colocated_sources, should_bridge_fork_variants,
6    should_bridge_patch_semantic_tokens, should_bridge_test_fixture_pair,
7};
8use crate::union_find::{NodeKind, UnionFind};
9use crate::verdict::Severity;
10
11/// Verifies that change request changes are well-scoped (single logical unit).
12pub struct ScopedChangeControl;
13
14impl Control for ScopedChangeControl {
15    fn id(&self) -> ControlId {
16        builtin::id(builtin::SCOPED_CHANGE)
17    }
18
19    fn description(&self) -> &'static str {
20        "Changes must be well-scoped (single logical unit)"
21    }
22
23    fn evaluate(&self, evidence: &EvidenceBundle) -> Vec<ControlFinding> {
24        if evidence.change_requests.is_empty() {
25            return vec![ControlFinding::not_applicable(
26                self.id(),
27                "No change requests found",
28            )];
29        }
30
31        evidence
32            .change_requests
33            .iter()
34            .map(|cr| evaluate_change(self.id(), cr))
35            .collect()
36    }
37}
38
39/// Extract identifier tokens from a unified diff patch for semantic matching.
40fn extract_identifiers_from_patch(patch: &str) -> Vec<String> {
41    let mut ids = Vec::new();
42    for line in patch.lines() {
43        if line.starts_with('+') || line.starts_with('-') {
44            let content = &line[1..];
45            for word in content.split(|c: char| !c.is_alphanumeric() && c != '_') {
46                if word.len() >= 3 && word.chars().next().is_some_and(|c| c.is_alphabetic()) {
47                    ids.push(word.to_string());
48                }
49            }
50        }
51    }
52    ids.sort();
53    ids.dedup();
54    ids
55}
56
57fn evaluate_change(id: ControlId, cr: &GovernedChange) -> ControlFinding {
58    let cr_subject = cr.id.to_string();
59
60    let assets = match &cr.changed_assets {
61        EvidenceState::NotApplicable => {
62            return ControlFinding::not_applicable(id, "Changed assets not applicable");
63        }
64        EvidenceState::Missing { gaps } => {
65            return ControlFinding::indeterminate(
66                id,
67                "Changed assets evidence could not be collected",
68                Vec::new(),
69                gaps.clone(),
70            );
71        }
72        EvidenceState::Complete { value } | EvidenceState::Partial { value, .. } => value,
73    };
74
75    // Filter to code files with diffs
76    let code_files: Vec<_> = assets
77        .iter()
78        .filter(|a| !is_non_code_file(&a.path) && a.diff_available)
79        .collect();
80
81    // 0-1 code files: trivially scoped
82    if code_files.len() <= 1 {
83        return ControlFinding::satisfied(
84            id,
85            format!("{cr_subject}: change request is well-scoped"),
86            code_files.iter().map(|a| a.path.clone()).collect(),
87        );
88    }
89
90    // Build union-find graph
91    let mut graph = UnionFind::new();
92    let mut file_nodes = Vec::new();
93
94    for (idx, asset) in code_files.iter().enumerate() {
95        let node = graph.add_node(idx as u16, &asset.path, NodeKind::File);
96        file_nodes.push(node);
97    }
98
99    let aux_count = code_files
100        .iter()
101        .filter(|a| classify_file_role(&a.path) != FileRole::Source)
102        .count();
103    let source_count = code_files.len().saturating_sub(aux_count);
104
105    // Extract identifiers from patches for semantic token matching
106    let all_identifiers: Vec<Vec<String>> = code_files
107        .iter()
108        .map(|a| extract_identifiers_from_patch(a.diff.as_deref().unwrap_or("")))
109        .collect();
110
111    // Apply heuristic bridges
112    for i in 0..code_files.len() {
113        for j in (i + 1)..code_files.len() {
114            let path_a = &code_files[i].path;
115            let path_b = &code_files[j].path;
116
117            let should_merge = should_bridge_colocated_sources(path_a, path_b)
118                || should_bridge_aux_to_source(path_a, path_b, source_count, aux_count)
119                || should_bridge_aux_to_source(path_b, path_a, source_count, aux_count)
120                || should_bridge_fork_variants(path_a, path_b)
121                || should_bridge_test_fixture_pair(path_a, path_b)
122                || should_bridge_patch_semantic_tokens(
123                    path_a,
124                    path_b,
125                    &all_identifiers[i],
126                    &all_identifiers[j],
127                    source_count,
128                    aux_count,
129                );
130
131            if should_merge {
132                graph.merge(file_nodes[i], file_nodes[j]);
133            }
134        }
135    }
136
137    // Feature namespace bridging
138    if aux_count > 0 {
139        let paths: Vec<&str> = code_files.iter().map(|a| a.path.as_str()).collect();
140        if let Some(ns) = extract_feature_namespace(&paths)
141            && ns.member_indices.len() >= 2
142        {
143            let anchor = file_nodes[ns.member_indices[0]];
144            for &idx in &ns.member_indices[1..] {
145                graph.merge(anchor, file_nodes[idx]);
146            }
147        }
148    }
149
150    let components = graph.component_count();
151    let severity = classify_scope(code_files.len(), components);
152    let subjects: Vec<String> = code_files.iter().map(|a| a.path.clone()).collect();
153
154    match severity {
155        Severity::Pass => ControlFinding::satisfied(
156            id,
157            format!("{cr_subject}: change request is well-scoped"),
158            subjects,
159        ),
160        _ => {
161            let comp_groups = graph.get_components();
162            let mut detail = String::new();
163            for (comp_idx, group) in comp_groups.iter().enumerate() {
164                detail.push_str(&format!("  Component {}:", comp_idx + 1));
165                for &file_idx in group {
166                    detail.push_str(&format!(" {}", code_files[file_idx as usize].path));
167                }
168                detail.push('\n');
169            }
170            ControlFinding::violated(
171                id,
172                format!(
173                    "{cr_subject}: change request has {components} disconnected change clusters\n{detail}"
174                ),
175                subjects,
176            )
177        }
178    }
179}
180
181#[cfg(test)]
182mod tests {
183    use super::*;
184    use crate::control::ControlStatus;
185    use crate::evidence::{ChangeRequestId, ChangedAsset};
186
187    fn asset(path: &str) -> ChangedAsset {
188        ChangedAsset {
189            path: path.to_string(),
190            diff_available: true,
191            additions: 1,
192            deletions: 0,
193            status: "modified".to_string(),
194            diff: Some("@@ -1 +1 @@\n+changed\n".to_string()),
195        }
196    }
197
198    fn bundle_with(assets: Vec<ChangedAsset>) -> EvidenceBundle {
199        EvidenceBundle {
200            change_requests: vec![GovernedChange {
201                id: ChangeRequestId::new("test", "owner/repo#1"),
202                title: "test".to_string(),
203                summary: None,
204                submitted_by: None,
205                changed_assets: EvidenceState::complete(assets),
206                approval_decisions: EvidenceState::not_applicable(),
207                source_revisions: EvidenceState::not_applicable(),
208                work_item_refs: EvidenceState::not_applicable(),
209            }],
210            ..Default::default()
211        }
212    }
213
214    #[test]
215    fn not_applicable_when_no_changes() {
216        let findings = ScopedChangeControl.evaluate(&EvidenceBundle::default());
217        assert_eq!(findings[0].status, ControlStatus::NotApplicable);
218    }
219
220    #[test]
221    fn satisfied_for_single_file() {
222        let bundle = bundle_with(vec![asset("src/foo.rs")]);
223        let findings = ScopedChangeControl.evaluate(&bundle);
224        assert_eq!(findings[0].status, ControlStatus::Satisfied);
225    }
226
227    #[test]
228    fn satisfied_for_connected_source_and_test() {
229        let bundle = bundle_with(vec![asset("src/foo.rs"), asset("tests/foo_test.rs")]);
230        let findings = ScopedChangeControl.evaluate(&bundle);
231        assert_eq!(
232            findings[0].status,
233            ControlStatus::Satisfied,
234            "source + test should be connected: {}",
235            findings[0].rationale
236        );
237    }
238
239    #[test]
240    fn violated_for_disconnected_files() {
241        let bundle = bundle_with(vec![
242            asset("src/auth/login.rs"),
243            asset("src/payment/checkout.rs"),
244        ]);
245        let findings = ScopedChangeControl.evaluate(&bundle);
246        assert_eq!(
247            findings[0].status,
248            ControlStatus::Violated,
249            "disconnected domains should be violated: {}",
250            findings[0].rationale
251        );
252    }
253
254    #[test]
255    fn non_code_files_excluded() {
256        let bundle = bundle_with(vec![asset("src/auth/login.rs"), asset("README.md")]);
257        let findings = ScopedChangeControl.evaluate(&bundle);
258        // Only one code file after filtering → satisfied
259        assert_eq!(findings[0].status, ControlStatus::Satisfied);
260    }
261}