Skip to main content

dupes_core/
extractor.rs

1use crate::code_unit::CodeUnitKind;
2use crate::node::{self, NodeKind, NormalizedNode};
3
4/// A sub-unit extracted from a normalized function body.
5pub struct SubUnit {
6    pub kind: CodeUnitKind,
7    pub node: NormalizedNode,
8    pub node_count: usize,
9    pub description: String,
10}
11
12/// Extract candidate sub-units from a normalized AST node.
13/// Walks the tree recursively and extracts natural compound structures
14/// (if branches, match arm bodies, loop bodies, closure bodies).
15/// Each sub-tree is re-indexed to canonical placeholder form.
16/// Only sub-trees meeting `min_node_count` are returned.
17#[must_use]
18pub fn extract_sub_units(node: &NormalizedNode, min_node_count: usize) -> Vec<SubUnit> {
19    let mut results = Vec::new();
20    extract_recursive(node, min_node_count, &mut results);
21    results
22}
23
24fn extract_recursive(node: &NormalizedNode, min_node_count: usize, results: &mut Vec<SubUnit>) {
25    match &node.kind {
26        // If -> [condition, then_branch, else_or_None]
27        NodeKind::If => {
28            if let Some(then_branch) = node.children.get(1) {
29                try_add(
30                    then_branch,
31                    CodeUnitKind::IfBranch,
32                    "if-then branch",
33                    min_node_count,
34                    results,
35                );
36            }
37            if let Some(else_br) = node.children.get(2)
38                && !else_br.is_none()
39            {
40                try_add(
41                    else_br,
42                    CodeUnitKind::IfBranch,
43                    "if-else branch",
44                    min_node_count,
45                    results,
46                );
47            }
48        }
49        // Match -> [expr, arm0, arm1, ...]
50        // Each arm is MatchArm -> [pattern, guard_or_None, body]
51        NodeKind::Match => {
52            for (i, arm) in node.children.iter().skip(1).enumerate() {
53                if let Some(body) = arm.children.get(2) {
54                    let desc = format!("match arm {}", i + 1);
55                    try_add(body, CodeUnitKind::MatchArm, &desc, min_node_count, results);
56                }
57            }
58        }
59        // Loop -> [body]
60        NodeKind::Loop => {
61            if let Some(body) = node.children.first() {
62                try_add(
63                    body,
64                    CodeUnitKind::LoopBody,
65                    "loop body",
66                    min_node_count,
67                    results,
68                );
69            }
70        }
71        // While -> [condition, body]
72        NodeKind::While => {
73            if let Some(body) = node.children.get(1) {
74                try_add(
75                    body,
76                    CodeUnitKind::LoopBody,
77                    "while body",
78                    min_node_count,
79                    results,
80                );
81            }
82        }
83        // ForLoop -> [pat, iter, body]
84        NodeKind::ForLoop => {
85            if let Some(body) = node.children.get(2) {
86                try_add(
87                    body,
88                    CodeUnitKind::LoopBody,
89                    "for body",
90                    min_node_count,
91                    results,
92                );
93            }
94        }
95        // Closure -> [body, param0, ...]
96        NodeKind::Closure => {
97            if let Some(body) = node.children.first() {
98                try_add(
99                    body,
100                    CodeUnitKind::Block,
101                    "closure body",
102                    min_node_count,
103                    results,
104                );
105            }
106        }
107        _ => {}
108    }
109
110    // Always recurse into all children
111    for child in &node.children {
112        extract_recursive(child, min_node_count, results);
113    }
114}
115
116fn try_add(
117    node: &NormalizedNode,
118    kind: CodeUnitKind,
119    description: &str,
120    min_node_count: usize,
121    results: &mut Vec<SubUnit>,
122) {
123    let reindexed = node::reindex_placeholders(node);
124    let node_count = node::count_nodes(&reindexed);
125    if node_count >= min_node_count {
126        results.push(SubUnit {
127            kind,
128            node: reindexed,
129            node_count,
130            description: description.to_string(),
131        });
132    }
133}