walky/solvers/approximate/
matching.rs

1//! This module contains algorithms to approximate
2//! a minimum-weight exact matching.
3//!
4//! This is used in [`crate::solvers::approximate::christofides`].
5#[cfg(feature = "mpi")]
6use std::{
7    mem::transmute,
8    slice::{from_raw_parts, from_raw_parts_mut},
9};
10
11use rand::{seq::SliceRandom, thread_rng};
12use rayon::prelude::*;
13
14#[cfg(feature = "mpi")]
15use mpi::collective::Root;
16#[cfg(feature = "mpi")]
17use mpi::topology::*;
18#[cfg(feature = "mpi")]
19use nalgebra::{Dyn, Matrix};
20
21use crate::{computation_mode::*, datastructures::NAMatrix};
22
23fn randomized_matching(vertices: &mut [usize]) {
24    vertices.shuffle(&mut thread_rng())
25}
26
27/// constructs a randomly selected matching and
28/// does `tries` many local-search optimizations
29/// on randomly selected pairs of edges
30pub fn approx_min_cost_matching<const MODE: usize>(
31    graph: &NAMatrix,
32    mut vertices: Vec<usize>,
33    tries: usize,
34) -> Vec<[usize; 2]> {
35    randomized_matching(&mut vertices);
36    let mut matching: Vec<[usize; 2]> = vertices
37        .chunks_exact(2)
38        .map(|chunk| [chunk[0], chunk[1]])
39        .collect();
40
41    match MODE {
42        SEQ_COMPUTATION => improve_matching(graph, matching.as_mut_slice(), tries),
43        PAR_COMPUTATION => par_improve_matching(graph, matching.as_mut_slice(), tries),
44        #[cfg(feature = "mpi")]
45        MPI_COMPUTATION => {
46            if let Some(universe) = mpi::initialize() {
47                // do not drop the universe until the call to mpi_improve_matching has finished!
48                let world = universe.world();
49                mpi_improve_matching(graph, matching.as_mut_slice(), tries, world)
50            } else {
51                let world = SystemCommunicator::world();
52                mpi_improve_matching(graph, matching.as_mut_slice(), tries, world)
53            }
54        }
55        _ => panic_on_invaid_mode::<MODE>(),
56    }
57    matching
58}
59
60// `graph`: a reference to the underlying graph
61//
62// `matching`: a mutable reference to a matching of vertices in the graph. Needs to satisfy
63// `matching.len() >= 2`
64//
65// `tries`: the number of repetitions of the following procedure:
66//
67// randomly choose two distinct pairs of edges:
68// given the situation:
69//
70// 0 -- 1
71//
72// 2 -- 3
73//
74// the function evaluates this two alternative matchings:
75//
76// 0    1
77// |    |
78// 2    3
79//
80// and
81//
82// 0     1
83//  \   /
84//   \ /
85//    X
86//   / \
87//  /   \
88// 2     3
89fn improve_matching(graph: &NAMatrix, matching: &mut [[usize; 2]], tries: usize) {
90    // cannot improve a matching with 0 or 1 edge
91    if matching.len() <= 1 {
92        return;
93    }
94
95    let mut rng = thread_rng();
96
97    for _ in 0..tries {
98        matching.shuffle(&mut rng);
99
100        // look at consecutive pairs of edges in the matching
101        // (ignore the last single edge in a matching of odd number of edges)
102        for chunk in matching.chunks_exact_mut(2) {
103            // safety: the pointers point to valid memory in the matching,
104            // and are non-overlapping.
105            // Here unsafe code is used to create two mutable references
106            // to two distinct cells of the same array
107            let edg01 = unsafe { chunk.as_mut_ptr().offset(0).as_mut().unwrap() };
108            let edg23 = unsafe { chunk.as_mut_ptr().offset(1).as_mut().unwrap() };
109
110            optimize_two_matching(edg01, edg23, graph);
111        }
112    }
113}
114
115// given the situation:
116//
117// 0 -- 1 (i.e. `edg01`)
118//
119// 2 -- 3 (i.e. `edg23`)
120//
121// the function evaluates this two alternative matchings:
122//
123// 0    1
124// |    |
125// 2    3
126//
127// and
128//
129// 0     1
130//  \   /
131//   \ /
132//    X
133//   / \
134//  /   \
135// 2     3
136#[inline]
137fn optimize_two_matching(edg01: &mut [usize; 2], edg23: &mut [usize; 2], graph: &NAMatrix) {
138    let v0 = edg01[0];
139    let v1 = edg01[1];
140    let v2 = edg23[0];
141    let v3 = edg23[1];
142
143    // base case cost
144    let cost_edg01 = graph[(v0, v1)];
145    let cost_edg23 = graph[(v2, v3)];
146    let cost_0 = cost_edg01 + cost_edg23;
147
148    // case 1
149    let cost_edg02 = graph[(v0, v2)];
150    let cost_edg13 = graph[(v1, v3)];
151    let cost_1 = cost_edg02 + cost_edg13;
152
153    // case 2
154    let cost_edg03 = graph[(v0, v3)];
155    let cost_edg12 = graph[(v1, v2)];
156    let cost_2 = cost_edg03 + cost_edg12;
157
158    // safety: the pointers point to valid memory in the matching,
159    // and they point to non-overlapping memory,
160    // hence no write-conflicts can occour
161    if cost_1 < cost_0 {
162        if cost_2 < cost_1 {
163            // case 2 is optimal
164            *edg01 = [v0, v2];
165            *edg23 = [v1, v2];
166        } else {
167            // case 1 is optimal
168            *edg01 = [v0, v2];
169            *edg23 = [v1, v3];
170        }
171    } else {
172        // cost_1 >= cost_0
173        if cost_2 < cost_0 {
174            // case 2 is optimal
175            *edg01 = [v0, v3];
176            *edg23 = [v1, v2];
177        } // else base case is optimal
178    }
179}
180
181/// parallelized version of [`improve_matching`]
182fn par_improve_matching(graph: &NAMatrix, matching: &mut [[usize; 2]], tries: usize) {
183    // step 1: generate pairs of edges, by shuffling the matching and taking chunks of size 2 as
184    // pairs of edges
185    let mut rng = thread_rng();
186
187    for _ in 0..tries {
188        matching.shuffle(&mut rng);
189
190        // look at consecutive pairs of edges in the matching
191        // (ignore the last single edge in a matching of odd number of edges)
192        matching
193            .par_chunks_exact_mut(2)
194            .with_min_len(1000)
195            .for_each(|chunk| {
196                // safety: the pointers point to valid memory in the matching,
197                // and are non-overlapping.
198                // Here unsafe code is used to create two mutable references
199                // to two distinct cells of the same array
200                let edg01 = unsafe { chunk.as_mut_ptr().offset(0).as_mut().unwrap() };
201                let edg23 = unsafe { chunk.as_mut_ptr().offset(1).as_mut().unwrap() };
202
203                optimize_two_matching(edg01, edg23, graph);
204            });
205    }
206}
207
208#[cfg(feature = "mpi")]
209pub fn mpi_improve_matching<C: Communicator>(
210    graph: &NAMatrix,
211    matching: &mut [[usize; 2]],
212    tries: usize,
213    world: C,
214) {
215    use mpi::traits::*;
216
217    let size = world.size();
218    let rank = world.rank();
219    let root_process = world.process_at_rank(crate::ROOT_RANK);
220    const COST_TAG: mpi::Tag = 0;
221    const MATCHING_TAG: mpi::Tag = 1;
222
223    let mut tries_copy = tries;
224    if rank == crate::ROOT_RANK {
225        bootstrap_mpi_matching_calc(&root_process, matching, rank, &mut tries_copy, graph);
226    }
227
228    // for every node / process:
229    // try a different matching and randomized improvement
230    //improve_matching(graph, matching, tries);
231    improve_matching(graph, matching, tries);
232    let cost: f64 = matching.iter().map(|&[i, j]| graph[(i, j)]).sum();
233    let mut min_cost = cost;
234
235    let mut best_matching_singletons: Vec<usize> = matching.iter().flat_map(|&edg| edg).collect();
236
237    if rank != crate::ROOT_RANK {
238        root_process.send_with_tag(&cost, COST_TAG);
239        // create a mutable slice over the data of `matching`.
240        // If matching is a slice of length n and type `&mut [[usize;2]]`,
241        // then matching_singletons is a slice of length 2n and type `&mut [usize]`
242        // safety: the memory is valid
243        let matching_singletons =
244            unsafe { from_raw_parts(&matching[0][0] as *const usize, matching.len() * 2) };
245        root_process.send_with_tag(matching_singletons, MATCHING_TAG);
246    } else {
247        // if rank == crate::ROOT_RANK
248
249        // the matchings will be sent as a Vec<usize>, because [usize; 2] toes not implement
250        // the trait mpi::Equivalence
251
252        // find the best solution across all nodes
253        for rk in 1..size {
254            let (other_cost, _status_cost) =
255                world.process_at_rank(rk).receive_with_tag::<f64>(COST_TAG);
256            let (msg, _status) = world
257                .process_at_rank(rk)
258                .receive_vec_with_tag::<usize>(MATCHING_TAG);
259            if other_cost < min_cost {
260                min_cost = other_cost;
261                best_matching_singletons = msg;
262            }
263        }
264    }
265
266    // broadcast best solution from the root_node to all nodes
267    root_process.broadcast_into(&mut min_cost);
268    root_process.broadcast_into(&mut best_matching_singletons);
269
270    // copy the matching data from `matching_singletons` back into `matching`.
271    //
272    // If `matching_singletons` has the following content:
273    // `[0, 1, 2, 3, ...]` then the vertices will be grouped into edges like this:
274    // `[[0,1], [2,3], ...]`.
275    matching.copy_from_slice(unsafe {
276        from_raw_parts(
277            transmute::<*const usize, *const [usize; 2]>(best_matching_singletons.as_ptr()),
278            matching.len(),
279        )
280    });
281}
282
283/// Distribute the initial matching
284/// from the root_node to all other processes.
285///
286/// `root_process`: handle of the root process
287///
288/// `matching`: If called on the root process: should be the matching.
289/// If called from any other process: can be chosen arbitrarily, as it will be overwritten from the
290/// root process
291///
292/// `rank`: rank of the current process
293///
294/// `tries`: If called on the root process: should contain the number of tries.
295/// If called from any other process: can be chosen arbitrarily, as it will be overwritten from the
296/// root process
297///
298/// `graph`: If called on the root process: should reference the graph.
299/// If called from any other process: can be chosen arbitrarily.
300#[cfg(feature = "mpi")]
301pub fn bootstrap_mpi_matching_calc<C: Communicator>(
302    root_process: &Process<C>,
303    matching: &mut [[usize; 2]],
304    rank: Rank,
305    tries: &mut usize,
306    graph: &NAMatrix,
307) -> (Vec<[usize; 2]>, NAMatrix) {
308    // broadcast tries from the root node to all processes
309
310    root_process.broadcast_into(tries);
311
312    let mut matching_size = matching.len();
313    broadcast_matching_size(root_process, &mut matching_size);
314
315    // broadcast the NAMatrix dim from the root process to all processes
316    use crate::datastructures::AdjacencyMatrix;
317    let mut dim = graph.dim();
318    broadcast_dim(root_process, &mut dim);
319
320    // create storage for the incoming matching
321    let mut matching_vec = vec![[0usize; 2]; matching_size];
322    if rank == crate::ROOT_RANK {
323        matching_vec.copy_from_slice(matching);
324    }
325    broadcast_matching(root_process, &mut matching_vec);
326
327    // broadcast the NAMatrix from the root process to all processes
328    let mut matrix_vec = vec![0.; dim * dim];
329    if rank == crate::ROOT_RANK {
330        matrix_vec.copy_from_slice(graph.data.as_vec())
331    }
332    broadcast_matrix_vec(root_process, &mut matrix_vec);
333    let matrix = NAMatrix(Matrix::from_vec_generic(Dyn(dim), Dyn(dim), matrix_vec));
334
335    (matching_vec, matrix)
336}
337
338/// broadcast the size of the matching to all processes
339#[cfg(feature = "mpi")]
340fn broadcast_matching_size<C: Communicator>(root_process: &Process<C>, matching_size: &mut usize) {
341    root_process.broadcast_into(matching_size);
342}
343
344#[cfg(feature = "mpi")]
345fn broadcast_dim<C: Communicator>(root_process: &Process<C>, dim: &mut usize) {
346    root_process.broadcast_into(dim);
347}
348
349#[cfg(feature = "mpi")]
350fn broadcast_matching<C: Communicator>(root_process: &Process<C>, matching: &mut Vec<[usize; 2]>) {
351    // cast the matching slice type from `&mut [[usize;2]]` to `&mut [usize]`
352    // to be able to send it via MPI
353    //let matching_singletons = if rank == 0 {
354    //    unsafe { from_raw_parts_mut(&mut matching[0][0] as *mut usize, matching.len() * 2) }
355    //} else {
356    //    unsafe { from_raw_parts_mut(&mut matching_vec[0][0] as *mut usize, matching.len() * 2) }
357    //};
358    let matching_singletons =
359        unsafe { from_raw_parts_mut(&mut matching[0][0] as *mut usize, matching.len() * 2) };
360    // distribute the matching from the root process to all other processes
361    root_process.broadcast_into(matching_singletons);
362}
363
364#[cfg(feature = "mpi")]
365fn broadcast_matrix_vec<C: Communicator>(root_process: &Process<C>, matrix_vec: &mut Vec<f64>) {
366    root_process.broadcast_into(matrix_vec);
367}