fast_nnt/algorithms/
convex_hull.rs

1use anyhow::{Result, anyhow};
2use fixedbitset::FixedBitSet;
3use petgraph::stable_graph::{EdgeIndex, NodeIndex};
4use std::collections::{HashMap, HashSet};
5
6use crate::phylo::phylo_graph::PhyloGraph;
7use crate::phylo::phylo_splits_graph::PhyloSplitsGraph;
8use crate::splits::asplit::ASplit;
9
10/* ------------------------------------------------------------------------- */
11/* Convex hull network construction (rotation/embedding aware)               */
12/* ------------------------------------------------------------------------- */
13
14/// Apply the convex hull algorithm to build a split network from `splits`.
15/// `n_tax` is the number of taxa (1-based semantics).
16/// `splits` is 0-based but we treat ids as 1..=nsplits in bitsets and logs.
17pub fn convex_hull_apply(
18    n_tax: usize,
19    taxa_labels: &[String],
20    splits: &[ASplit],
21    graph: &mut PhyloSplitsGraph,
22) -> Result<()> {
23    let mut used = FixedBitSet::with_capacity(splits.len() + 1);
24    convex_hull_apply_with_used(n_tax, taxa_labels, splits, graph, &mut used)
25}
26
27/// Same as `convex_hull_apply` but reuses `used_splits` (1-based bits).
28pub fn convex_hull_apply_with_used(
29    n_tax: usize,
30    taxa_labels: &[String],
31    splits: &[ASplit],
32    graph: &mut PhyloSplitsGraph,
33    used_splits: &mut FixedBitSet,
34) -> Result<()> {
35    // finish early if everything is already used
36    if used_splits.count_ones(..) == splits.len() {
37        return Ok(());
38    }
39    used_splits.grow(splits.len() + 1);
40
41    // Initialize graph if empty: one node holding all taxa
42    if graph.base.graph.node_count() == 0 {
43        let v = graph.base.new_node();
44        for t in 1..=n_tax {
45            graph.base.add_taxon(v, t);
46        }
47    } else {
48        // lightweight sanity (optional)
49        for t in 1..=n_tax {
50            if graph.base.get_taxon_node(t).is_none() {
51                log::warn!("ConvexHull: taxon {} has no node", t);
52            }
53        }
54        for e in graph.base.graph.edge_indices() {
55            if graph.get_split(e) == 0 {
56                log::warn!("ConvexHull: edge {:?} has no split id", e);
57            }
58        }
59    }
60
61    // Process splits in increasing size order (ties by id), skipping ones already used
62    let order = get_order_to_process_splits_in(splits, used_splits);
63
64    for &sid in &order {
65        let s = &splits[sid - 1];
66        let part_a = s.get_a(); // 1-based membership of side A
67
68        // Marking per node: 0 => in B, 1 => in A, 2 => intersection A∩B
69        let mut hulls: HashMap<NodeIndex, i32> = HashMap::new();
70        let mut intersections: Vec<NodeIndex> = Vec::new();
71
72        // Determine which already-placed splits divide each side
73        let mut splits0 = FixedBitSet::with_capacity(splits.len() + 1);
74        let mut splits1 = FixedBitSet::with_capacity(splits.len() + 1);
75
76        for i in 1..=splits.len() {
77            if !used_splits.contains(i) {
78                continue;
79            }
80            // splits that cut the complement of A (side 0)
81            if intersect2_cardinality(s, false, &splits[i - 1], true) > 0
82                && intersect2_cardinality(s, false, &splits[i - 1], false) > 0
83            {
84                splits0.insert(i);
85            }
86            // splits that cut A (side 1)
87            if intersect2_cardinality(s, true, &splits[i - 1], true) > 0
88                && intersect2_cardinality(s, true, &splits[i - 1], false) > 0
89            {
90                splits1.insert(i);
91            }
92        }
93
94        // Pick one start node from each side by taxon membership
95        let (mut start0, mut start1) = (None, None);
96        for t in 1..=n_tax {
97            if !part_a.contains(t) && start0.is_none() {
98                start0 = graph.base.get_taxon_node(t);
99            }
100            if part_a.contains(t) && start1.is_none() {
101                start1 = graph.base.get_taxon_node(t);
102            }
103            if start0.is_some() && start1.is_some() {
104                break;
105            }
106        }
107        let start0 = start0.ok_or_else(|| anyhow!("ConvexHull: missing start node for side B"))?;
108        let start1 = start1.ok_or_else(|| anyhow!("ConvexHull: missing start node for side A"))?;
109
110        hulls.insert(start0, 0);
111        if start0 == start1 {
112            hulls.insert(start1, 2);
113            intersections.push(start1);
114        } else {
115            hulls.insert(start1, 1);
116        }
117
118        // Grow both hulls via DFS over allowed splits, using rotation order
119        convex_hull_path(graph, start0, &mut hulls, &splits0, &mut intersections, 0);
120        convex_hull_path(graph, start1, &mut hulls, &splits1, &mut intersections, 1);
121
122        // Make intersection list unique (can be discovered from both sides)
123        {
124            let mut seen: HashSet<NodeIndex> = HashSet::new();
125            intersections.retain(|v| seen.insert(*v));
126        }
127
128        // Duplicate each intersection node: add v1--v carrying split sid
129        for &v in &intersections {
130            let v1 = graph.base.new_node();
131
132            // Place the hook edge deterministically: insert at `v` AFTER its first adjacent edge (if any)
133            let e_hook = if let Some(f0) = graph.first_adjacent_edge(v) {
134                graph.new_edge_after(v1, v, f0)?
135            } else {
136                graph.new_edge(v1, v)?
137            };
138            graph.set_split(e_hook, sid as i32);
139            graph.base.set_weight(e_hook, s.weight);
140            graph.base.set_edge_label(e_hook, sid.to_string());
141
142            // Move taxa in A to v1; others stay on v
143            let taxa_old = graph.base.get_node_taxon(v).unwrap_or(&[]);
144            let list = taxa_old.to_vec();
145            graph.base.clear_taxa_for_node(v);
146            for taxon in list {
147                if part_a.contains(taxon) {
148                    graph.base.add_taxon(v1, taxon);
149                } else {
150                    graph.base.add_taxon(v, taxon);
151                }
152            }
153        }
154
155        // Rewire edges around each intersection node v, preserving rotation:
156        // - For neighbor w marked side 1 (A), move v--w to v1--w:
157        //   insert the new edge on w's rotation AFTER `consider` then delete `consider`.
158        for &v in &intersections {
159            let e_to_v1 = find_edge_with_split(graph, v, sid).expect("duplicate hook missing");
160            let v1 = graph.base.get_opposite(v, e_to_v1);
161
162            // snapshot current rotation at v to avoid iterator invalidation
163            let adj_v: Vec<EdgeIndex> = graph.rotation(v).to_vec();
164            for consider in adj_v {
165                if consider == e_to_v1 {
166                    continue;
167                }
168                let w = graph.base.get_opposite(v, consider);
169                let mark = *hulls.get(&w).unwrap_or(&-1);
170
171                if mark == 1 {
172                    // Move consider to connect v1--w. Preserve w’s local order by placing AFTER `consider`.
173                    let consider_dup = graph.new_edge_after(v1, w, consider)?;
174                    // copy attributes (split/weight/label)
175                    let sid_old = graph.get_split(consider);
176                    if sid_old != 0 {
177                        graph.set_split(consider_dup, sid_old);
178                    }
179                    let wgt = graph.base.weight(consider);
180                    if wgt != crate::phylo::phylo_graph::DEFAULT_WEIGHT {
181                        graph.base.set_weight(consider_dup, wgt);
182                    }
183                    if let Some(lbl) = graph.base.edge_label(consider).map(|s| s.to_string()) {
184                        graph.base.set_edge_label(consider_dup, lbl);
185                    }
186                    // delete original edge; rotation is updated by wrapper
187                    graph.remove_edge(consider);
188                } else if mark == 2 {
189                    // w is also an intersection: connect v1--w1 if not present
190                    let e_w_to_w1 =
191                        find_edge_with_split(graph, w, sid).expect("peer duplicate missing");
192                    let w1 = graph.base.get_opposite(w, e_w_to_w1);
193
194                    if graph.base.graph.find_edge(v1, w1).is_none() {
195                        // Place on w1 side AFTER e_w_to_w1 for stability
196                        let e_new = graph.new_edge_after(v1, w1, e_w_to_w1)?;
197                        let sid_old = graph.get_split(consider);
198                        if sid_old != 0 {
199                            graph.set_split(e_new, sid_old);
200                        }
201                        let wgt = graph.base.weight(consider);
202                        if wgt != crate::phylo::phylo_graph::DEFAULT_WEIGHT {
203                            graph.base.set_weight(e_new, wgt);
204                        }
205                        if let Some(lbl) = graph.base.edge_label(consider).map(|s| s.to_string()) {
206                            graph.base.set_edge_label(e_new, lbl);
207                        }
208                    }
209                }
210            }
211        }
212
213        used_splits.insert(sid);
214    }
215
216    // Clear node labels, then label leaves from taxa
217    for v in graph.base.graph.node_indices().collect::<Vec<_>>() {
218        graph.base.set_node_label(v, "");
219    }
220    add_leaf_labels_from_taxa(graph, taxa_labels);
221
222    // Optional edge label trick (first edge per split) — then clear (matches Java behavior)
223    let edges = graph.base.graph.edge_indices().collect::<Vec<_>>();
224    {
225        let mut seen = FixedBitSet::with_capacity(splits.len() + 1);
226        for &e in &edges {
227            let s = graph.get_split(e);
228            if s > 0 && !seen.contains(s as usize) {
229                seen.insert(s as usize);
230                graph.base.set_edge_label(e, s.to_string());
231            } else {
232                graph.base.set_edge_label(e, "");
233            }
234        }
235    }
236    for e in edges {
237        graph.base.set_edge_label(e, "");
238    }
239
240    Ok(())
241}
242
243/* ---------------- rotation-aware helpers ---------------- */
244
245/// Grow one hull from `start` over edges whose split ids are in `allowed_splits`.
246/// Traversal order follows the node’s rotation for determinism.
247fn convex_hull_path(
248    g: &mut PhyloSplitsGraph,
249    start: NodeIndex,
250    hulls: &mut HashMap<NodeIndex, i32>,
251    allowed_splits: &FixedBitSet, // 1-based split ids
252    intersections: &mut Vec<NodeIndex>,
253    side: i32, // 0 (B) or 1 (A)
254) {
255    let mut seen_edges: HashSet<EdgeIndex> = HashSet::new();
256    let mut stack: Vec<NodeIndex> = vec![start];
257
258    while let Some(v) = stack.pop() {
259        // iterate incident edges in rotation order
260        for &f in g.rotation(v) {
261            if seen_edges.contains(&f) {
262                continue;
263            }
264            let sid = g.get_split(f);
265            if sid <= 0 || !allowed_splits.contains(sid as usize) {
266                continue;
267            }
268
269            seen_edges.insert(f);
270            let w = g.base.get_opposite(v, f);
271
272            match hulls.get(&w).copied() {
273                None => {
274                    hulls.insert(w, side);
275                    stack.push(w);
276                }
277                Some(mark) if mark == (1 - side) => {
278                    if *hulls.entry(w).or_insert(2) != 2 {
279                        hulls.insert(w, 2);
280                    }
281                    intersections.push(w);
282                    stack.push(w);
283                }
284                _ => { /* already on this hull or intersection */ }
285            }
286        }
287    }
288}
289
290/// Find the (unique) edge incident to `v` carrying split id `split_id` (use rotation for determinism).
291fn find_edge_with_split(g: &PhyloSplitsGraph, v: NodeIndex, split_id: usize) -> Option<EdgeIndex> {
292    for &e in g.rotation(v) {
293        if g.get_split(e) == split_id as i32 {
294            return Some(e);
295        }
296    }
297    None
298}
299
300/// Order splits by increasing size then id, skipping already-used ones.
301fn get_order_to_process_splits_in(splits: &[ASplit], used: &FixedBitSet) -> Vec<usize> {
302    let mut items: Vec<(usize, usize)> = Vec::with_capacity(splits.len());
303    for s in 1..=splits.len() {
304        if !used.contains(s) {
305            items.push((splits[s - 1].size(), s));
306        }
307    }
308    items.sort_unstable();
309    items.into_iter().map(|(_, s)| s).collect()
310}
311
312/// Intersection cardinality of chosen sides: if `side_a=true` use A-part else B-part.
313fn intersect2_cardinality(a: &ASplit, side_a: bool, b: &ASplit, side_b: bool) -> usize {
314    let pa = if side_a { a.get_a() } else { a.get_b() };
315    let pb = if side_b { b.get_a() } else { b.get_b() };
316
317    let (small, large) = if pa.count_ones(..) <= pb.count_ones(..) {
318        (pa, pb)
319    } else {
320        (pb, pa)
321    };
322    small.ones().filter(|&i| large.contains(i)).count()
323}
324
325/// Assign node labels for leaves (degree==1) if exactly one taxon maps to that node.
326fn add_leaf_labels_from_taxa(g: &mut PhyloSplitsGraph, taxa_labels: &[String]) {
327    let base: &mut PhyloGraph = &mut g.base;
328    for v in base.graph.node_indices().collect::<Vec<_>>() {
329        if base.graph.neighbors(v).count() == 1 {
330            if let Some(n2t) = base.node2taxa() {
331                if let Some(list) = n2t.get(&v) {
332                    if list.len() == 1 {
333                        let t = list[0];
334                        if (1..=taxa_labels.len()).contains(&t) {
335                            base.set_node_label(v, &taxa_labels[t - 1]);
336                        }
337                    }
338                }
339            }
340        }
341    }
342}
343
344/* ---------------- tiny access shim ---------------- */
345
346/// Expose rotation slice for iteration (read-only). Add this to `impl PhyloSplitsGraph`.
347trait RotationAccess {
348    fn rotation(&self, v: NodeIndex) -> &[EdgeIndex];
349}
350impl RotationAccess for PhyloSplitsGraph {
351    #[inline]
352    fn rotation(&self, v: NodeIndex) -> &[EdgeIndex] {
353        self.rot(v)
354    }
355}