oxygraph/
bipartite.rs

1//! A bipartite graph is one where there are nodes
2//! of two strata (or colours). Edges can form *between*
3//! these two strata, but not *within*.
4//!
5//! In `oxygraph`, the bipartite graph is implemented
6//! as a wrapper over a `petgraph::Graph`, and there are
7//! no enforcements to maintain bipartite-ness. This is down
8//! to the user. A bipartite graph can be checked, once created,
9//! with the `is_bipartite` method.
10
11use calm_io::*;
12use csv::ReaderBuilder;
13use ordered_float::OrderedFloat;
14use petgraph::{
15    graph::NodeIndex,
16    visit::{EdgeRef, IntoNodeReferences},
17    Direction::{self, Incoming, Outgoing},
18    Graph,
19};
20use rand::{self, seq::IndexedRandom};
21use serde_derive::Deserialize;
22use std::collections::{BTreeMap, HashMap};
23use std::path::PathBuf;
24use thiserror::Error;
25
26use crate::{scale_fit, MARGIN_LR};
27
28/// Error type for reading in a DSV.
29#[derive(Error, Debug)]
30pub enum ReadDSVError {
31    #[error("Problem reading from path.")]
32    FromPath { source: csv::Error },
33    #[error("Problem with StringRecord: {source}")]
34    StringRecordParseError { source: csv::Error },
35    #[error("Problem with graph creation")]
36    GraphCreationError { source: DSVError },
37}
38
39/// Error type for generating a random graph.
40#[derive(Error, Debug)]
41pub enum RandomError {
42    #[error("More edges than is possible for a bipartite graph ({0}).")]
43    MaxEdges(usize),
44    #[error("Number of nodes for a graph must be non-zero.")]
45    NoNodes,
46}
47
48/// Errors that may occur while reading a delimited-separated values (DSV) file.
49#[derive(Error, Debug)]
50pub enum DSVError {
51    #[error("No weight.")]
52    Weight,
53    #[error("No source node.")]
54    Source,
55    #[error("No target node.")]
56    Target,
57    #[error("Node appears in both parasites and hosts - i.e. not a bipartite graph.")]
58    NotBipartite,
59}
60
61/// A row in the DSV should only be these three columns currently.
62///
63/// * `from` - Actor species (parasite/parasitoid/higher trophic level).
64/// * `to` - Recipient species (host/lower trophic level).
65/// * `weight` - The weight of the edge between the two species.
66#[derive(Debug, Deserialize, PartialEq)]
67pub struct Row {
68    /// The actor species (parasite/parasitoid/higher trophic level).
69    pub from: String,
70    /// The recipient species (host/lower trophic level).
71    pub to: String,
72    /// Weights between actor/recipient can only be floats
73    /// at the moment. But could be more complex in the future.
74    /// These are added to the *nodes*.
75    pub weight: f64,
76}
77
78/// Represents the two partitions in a bipartite graph:
79/// - `Hosts` (lower level, typically recipients)
80/// - `Parasites` (higher level, typically actors)
81#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
82pub enum Partition {
83    Hosts,
84    Parasites,
85}
86
87impl std::fmt::Display for Partition {
88    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
89        match self {
90            Partition::Hosts => write!(f, "Hosts"),
91            Partition::Parasites => write!(f, "Parasites"),
92        }
93    }
94}
95
96/// Represents a species node in the bipartite graph.
97///
98/// # Fields
99/// * `name` - The species name.
100/// * `partition` - Whether it's a host or parasite.
101#[derive(Debug, Clone)]
102pub struct SpeciesNode {
103    pub name: String,
104    pub partition: Partition,
105}
106
107impl SpeciesNode {
108    /// Create a new node
109    pub fn new(name: String, partition: Partition) -> Self {
110        Self { name, partition }
111    }
112
113    /// Convert a `SpeciesNode` to a species name.
114    pub fn to_species(&self) -> String {
115        self.name.clone()
116    }
117}
118
119impl std::fmt::Display for SpeciesNode {
120    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
121        // ignore partition for now
122        write!(f, "{}", self.name)
123    }
124}
125
126/// Fitness is currently an `f64`.
127pub type Fitness = f64;
128
129/// Wrapper for a petgraph `Graph`, representing a bipartite network.
130///
131/// Nodes are `SpeciesNode`, and edges are weighted with `Fitness`.
132#[derive(Debug, Clone)]
133pub struct BipartiteGraph(pub Graph<SpeciesNode, Fitness>);
134
135/// Output from bipartiteness checking.
136/// * `Yes` contains a color map for nodes.
137/// * `No` means the graph is not bipartite.
138#[derive(Debug)]
139pub enum Strata {
140    /// If there are strata present, return these
141    /// as a map of node indices &
142    Yes(HashMap<NodeIndex, bool>),
143    /// There weren't any strata. This isn't a
144    /// bipartite graph! And what are the offending nodes?
145    No,
146}
147
148/// A structure to hold the output of the
149/// bipartite graph statistics.
150#[derive(Debug)]
151pub struct BipartiteStats {
152    /// The number of parasites in the graph.
153    pub no_parasites: usize,
154    /// The number of hosts in the graph.
155    pub no_hosts: usize,
156    /// The number of edges in the graph.
157    pub no_edges: usize,
158}
159
160impl BipartiteGraph {
161    /// Checks whether the graph is bipartite.
162    ///
163    /// This uses a two-coloring algorithm to verify that no two adjacent nodes
164    /// share the same partition. If the graph is bipartite, returns `Strata::Yes`
165    /// with a map of node indices and their assigned color (bool).
166    /// If not bipartite, returns `Strata::No`.
167    ///
168    /// # Returns
169    /// * `Strata::Yes(map)` if the graph is bipartite.
170    /// * `Strata::No` if the graph is not bipartite.
171    pub fn is_bipartite(&self) -> Strata {
172        // create a map to store the colours
173        let mut colour_map: HashMap<NodeIndex, bool> = HashMap::new();
174        // iterate over all the nodes
175        // ignoring the weights.
176        for (node, _) in self.0.node_references() {
177            // does the map contain the node?
178            let contains_node = colour_map.contains_key(&node);
179            // now get the neighbours of this node.
180
181            let no_neighbours = self.0.neighbors_undirected(node).count();
182
183            if contains_node || no_neighbours == 0 {
184                continue;
185            }
186
187            // make a queue
188            let mut queue = vec![node];
189            colour_map.insert(node, true);
190
191            while !queue.is_empty() {
192                let v = queue.pop().unwrap();
193                let c = !colour_map.get(&v).unwrap();
194                let inner_neighbours: Vec<NodeIndex> = self.0.neighbors_undirected(v).collect();
195                for w in &inner_neighbours {
196                    let contains_node_inner = colour_map.contains_key(w);
197                    if contains_node_inner {
198                        if colour_map.get(w).unwrap() == colour_map.get(&v).unwrap() {
199                            return Strata::No;
200                        }
201                    } else {
202                        colour_map.insert(*w, c);
203                        queue.push(*w);
204                    }
205                }
206            }
207        }
208
209        Strata::Yes(colour_map)
210    }
211
212    /// Generates a random bipartite graph with a given number of parasites,
213    /// hosts, and edges.
214    ///
215    /// # Arguments
216    /// * `parasite_no` - Number of parasite nodes.
217    /// * `host_no` - Number of host nodes.
218    /// * `edge_no` - Number of edges between parasites and hosts.
219    ///
220    /// # Returns
221    /// * `Ok(BipartiteGraph)` if successful.
222    /// * `Err(RandomError)` if node counts are zero or edges exceed the maximum possible.
223    pub fn random(parasite_no: usize, host_no: usize, edge_no: usize) -> Result<Self, RandomError> {
224        // must be greater than no nodes.
225        if parasite_no == 0 || host_no == 0 {
226            return Err(RandomError::NoNodes);
227        }
228
229        let max_edges = parasite_no * host_no;
230        if edge_no > max_edges {
231            return Err(RandomError::MaxEdges(max_edges));
232        }
233        // so we make a new bipartite graph with the number of nodes =
234        // parasite_no + host_no
235        // then get random node from parasites
236        // and random node from hosts
237        // then add while edge count is less than edge_no
238        let mut graph: Graph<SpeciesNode, Fitness> = Graph::new();
239
240        let mut p_node_indices = Vec::new();
241        // add the parasite node indices to the graph
242        for _ in 0..parasite_no {
243            let spnode = SpeciesNode::new("".into(), Partition::Parasites);
244            let nidx = graph.add_node(spnode);
245            p_node_indices.push(nidx);
246        }
247
248        let mut h_node_indices = Vec::new();
249        // add the host node indices to the graph
250        for _ in 0..host_no {
251            let spnode = SpeciesNode::new("".into(), Partition::Hosts);
252            let nidx = graph.add_node(spnode);
253            h_node_indices.push(nidx);
254        }
255
256        let mut edge_count = 0;
257
258        while edge_count < edge_no {
259            // guarantee these slices are non-empty pls.
260            let p = *p_node_indices.choose(&mut rand::thread_rng()).unwrap();
261            let h = *h_node_indices.choose(&mut rand::thread_rng()).unwrap();
262
263            // check if this edge already exists.
264            if graph.contains_edge(p, h) {
265                continue;
266            }
267            graph.add_edge(p, h, 0.0);
268            edge_count += 1;
269        }
270
271        Ok(BipartiteGraph(graph))
272    }
273
274    /// Returns simple statistics on the bipartite graph: counts of parasites,
275    /// hosts, and edges.
276    ///
277    /// # Returns
278    /// `BipartiteStats` struct with parasite count, host count, and edge count.
279    pub fn stats(&self) -> BipartiteStats {
280        let (parasites, hosts) = &self.get_parasite_host_from_graph();
281
282        let no_parasites = parasites.len();
283        let no_hosts = hosts.len();
284
285        let no_edges = &self.0.edge_count();
286
287        BipartiteStats {
288            no_parasites,
289            no_hosts,
290            no_edges: *no_edges,
291        }
292    }
293
294    /// Reads a bipartite graph from a delimited file.
295    ///
296    /// The file should contain three columns: `from`, `to`, and `weight`.
297    /// Any delimiter can be specified (e.g., comma, tab).
298    ///
299    /// # Arguments
300    /// * `input` - Path to the DSV file.
301    /// * `delimiter` - Delimiter as a byte, e.g. `b'\t'` for TSV.
302    ///
303    /// # Returns
304    /// `Ok(BipartiteGraph)` on success, or `ReadDSVError` on failure.
305    pub fn from_dsv(input: &PathBuf, delimiter: u8) -> Result<Self, ReadDSVError> {
306        let mut rdr = ReaderBuilder::new()
307            .delimiter(delimiter)
308            .from_path(input)
309            .map_err(|s| ReadDSVError::FromPath { source: s })?;
310
311        let mut edges = Vec::new();
312
313        for result in rdr.deserialize() {
314            let record: Row =
315                result.map_err(|s| ReadDSVError::StringRecordParseError { source: s })?;
316            edges.push(record);
317        }
318        Self::create_graph_from_dsv(edges)
319            .map_err(|e| ReadDSVError::GraphCreationError { source: e })
320    }
321
322    /// Internal function to create a graph from DSV rows.
323    ///
324    /// The DSV must represent a valid bipartite network: "from" nodes
325    /// are treated as parasites, and "to" nodes as hosts. Nodes cannot
326    /// belong to both strata.
327    ///
328    /// # Returns
329    /// `Ok(BipartiteGraph)` if valid, otherwise `DSVError`.
330    fn create_graph_from_dsv(input: Vec<Row>) -> Result<BipartiteGraph, DSVError> {
331        use std::collections::{HashMap, HashSet};
332
333        // Step 1: Collect unique node names and their partitions
334        let mut parasite_nodes: HashSet<&String> = HashSet::new();
335        let mut host_nodes: HashSet<&String> = HashSet::new();
336
337        for row in &input {
338            parasite_nodes.insert(&row.from);
339            host_nodes.insert(&row.to);
340        }
341
342        // Step 2: Build list of all nodes
343        let mut all_nodes: HashSet<&String> = parasite_nodes.union(&host_nodes).copied().collect();
344
345        // Step 3: Create the graph
346        let mut graph: Graph<SpeciesNode, Fitness> = Graph::new();
347        let mut node_index_map: HashMap<String, NodeIndex> = HashMap::new();
348
349        // Step 4: Add nodes with SpeciesNode (name + partition)
350        for node_name in all_nodes.drain() {
351            let partition = if parasite_nodes.contains(node_name) && host_nodes.contains(node_name)
352            {
353                // we can't have a node in both partitions
354                return Err(DSVError::NotBipartite);
355            } else if parasite_nodes.contains(node_name) {
356                Partition::Parasites
357            } else {
358                Partition::Hosts
359            };
360
361            let species_node = SpeciesNode {
362                name: node_name.clone(),
363                partition,
364            };
365
366            let node_index = graph.add_node(species_node);
367            node_index_map.insert(node_name.clone(), node_index);
368        }
369
370        // Step 5: Add edges
371        for Row { from, to, weight } in input {
372            let from_node_index = node_index_map.get(&from).unwrap();
373            let to_node_index = node_index_map.get(&to).unwrap();
374
375            graph.add_edge(*from_node_index, *to_node_index, weight);
376        }
377
378        // Return wrapped graph
379        Ok(BipartiteGraph(graph))
380    }
381
382    /// Extracts parasite and host nodes from the bipartite graph.
383    ///
384    /// # Returns
385    /// A tuple with two vectors:
386    /// * Parasites: Vec of `(NodeIndex, &SpeciesNode)`
387    /// * Hosts: Vec of `(NodeIndex, &SpeciesNode)`
388    pub fn get_parasite_host_from_graph(
389        &self,
390    ) -> (
391        Vec<(NodeIndex, &SpeciesNode)>,
392        Vec<(NodeIndex, &SpeciesNode)>,
393    ) {
394        let graph = &self.0;
395
396        let hosts: Vec<_> = graph
397            .node_references()
398            .filter(|(_, node)| node.partition == Partition::Hosts)
399            .collect();
400
401        let parasites: Vec<_> = graph
402            .node_references()
403            .filter(|(_, node)| node.partition == Partition::Parasites)
404            .collect();
405
406        (parasites, hosts)
407    }
408
409    /// Returns the degree (number of connections) of a single node.
410    ///
411    /// # Arguments
412    /// * `node` - NodeIndex of the node.
413    ///
414    /// # Returns
415    /// Degree (usize).
416    pub fn node_degree(&self, node: NodeIndex) -> usize {
417        let graph = &self.0;
418        graph.edges_directed(node, Direction::Incoming).count()
419            + graph.edges_directed(node, Direction::Outgoing).count()
420    }
421
422    /// Returns a list of degrees for nodes in the graph.
423    ///
424    /// # Arguments
425    /// * `partition` - Optional filter by `Partition::Hosts` or `Partition::Parasites`.
426    ///
427    /// # Returns
428    /// Vector of tuples: `(node name, partition, degree)`.
429    pub fn degrees(&self, partition: Option<Partition>) -> Vec<(String, Partition, usize)> {
430        let graph = &self.0;
431
432        graph
433            .node_references()
434            .filter(|(_, node)| partition.map_or(true, |p| node.partition == p))
435            .map(|(node_index, node)| {
436                (
437                    node.name.clone(),
438                    node.partition,
439                    self.node_degree(node_index),
440                )
441            })
442            .collect()
443    }
444
445    /// Returns the weighted degree (also known as strength) of a single node.
446    ///
447    /// # Arguments
448    /// * `node` - NodeIndex of the node.
449    ///
450    /// # Returns
451    /// Weighted degree (sum of edge weights).
452    pub fn weighted_node_degree(&self, node: NodeIndex) -> f64 {
453        let graph = &self.0;
454
455        graph.edges(node).map(|e| *e.weight()).sum()
456    }
457
458    /// Returns weighted degrees (strengths) for nodes in the graph.
459    ///
460    /// # Arguments
461    /// * `partition` - Optional filter by `Partition::Hosts` or `Partition::Parasites`.
462    ///
463    /// # Returns
464    /// Vector of tuples: `(node name, partition, strength)`.
465    pub fn weighted_degrees(&self, partition: Option<Partition>) -> Vec<(String, Partition, f64)> {
466        let graph = &self.0;
467
468        graph
469            .node_references()
470            .filter(|(_, node)| partition.map_or(true, |p| node.partition == p))
471            .map(|(node_index, node)| {
472                (
473                    node.name.clone(),
474                    node.partition,
475                    self.weighted_node_degree(node_index),
476                )
477            })
478            .collect()
479    }
480
481    /// Computes the degree distribution of the graph (unweighted or weighted).
482    ///
483    /// Uses Freedman-Diaconis rule to bin the degrees into a histogram.
484    ///
485    /// # Arguments
486    /// * `partition` - Optional filter by `Partition::Hosts` or `Partition::Parasites`.
487    /// * `weighted` - Whether to compute weighted degrees.
488    ///
489    /// # Returns
490    /// A tuple:
491    /// * Bin width (f64).
492    /// * BTreeMap where keys are bin lower bounds and values are counts.
493    pub fn degree_distribution(
494        &self,
495        partition: Option<Partition>,
496        weighted: bool,
497    ) -> (f64, BTreeMap<OrderedFloat<f64>, usize>) {
498        // 1. Get degrees or weighted degrees
499        let degrees: Vec<f64> = if weighted {
500            self.weighted_degrees(partition)
501                .into_iter()
502                .map(|(_, _, d)| d)
503                .collect()
504        } else {
505            self.degrees(partition)
506                .into_iter()
507                .map(|(_, _, d)| d as f64)
508                .collect()
509        };
510
511        if degrees.is_empty() {
512            return (0.0, BTreeMap::new());
513        }
514
515        // 2. Sort degrees for binning
516        let mut sorted_degrees = degrees.clone();
517        sorted_degrees.sort_by(|a, b| a.partial_cmp(b).unwrap());
518
519        // 3. Calculate IQR (Interquartile Range)
520        fn calculate_iqr(sorted_degrees: &[f64]) -> f64 {
521            let len = sorted_degrees.len();
522            let q1_index = len / 4;
523            let q3_index = 3 * len / 4;
524            let q1 = sorted_degrees[q1_index];
525            let q3 = sorted_degrees[q3_index];
526            q3 - q1
527        }
528
529        let iqr = calculate_iqr(&sorted_degrees);
530        let bin_width = (2.0 * iqr / (degrees.len() as f64).cbrt()).max(1.0).round();
531
532        // 4. Build histogram
533        let min_degree = *sorted_degrees.first().unwrap();
534        let mut histogram = BTreeMap::new();
535
536        for &degree in &degrees {
537            // Calculate bin, offset from min_degree
538            let bin = ((degree - min_degree) / bin_width).floor() * bin_width + min_degree;
539            histogram
540                .entry(OrderedFloat(bin))
541                .and_modify(|count| *count += 1)
542                .or_insert(1);
543        }
544
545        (bin_width, histogram)
546    }
547
548    /// Computes the bivariate degree distribution of edges.
549    ///
550    /// For each edge in the graph, returns a tuple containing the degree
551    /// of the source node and the degree of the target node.
552    ///
553    /// # Returns
554    /// Vector of `(degree_source, degree_target)` pairs.
555    pub fn bivariate_degree_distribution(&self) -> Vec<(usize, usize)> {
556        let graph = &self.0;
557
558        graph
559            .edge_references()
560            .map(|e| (self.node_degree(e.source()), self.node_degree(e.target())))
561            .collect()
562    }
563
564    /// Plots the bipartite graph as an SVG image.
565    ///
566    /// Parasites and hosts are drawn as circles at the top and bottom
567    /// of the SVG, respectively. Edges are drawn as lines between them.
568    ///
569    /// # Arguments
570    /// * `width` - Width of the SVG canvas.
571    /// * `height` - Height of the SVG canvas.
572    pub fn plot(&self, width: i32, height: i32) {
573        let graph = &self.0;
574
575        // some consts and fns
576        // scale the nodes across the bipartite layers
577        const NODE_SCALE: f64 = 4.0;
578
579        let (parasites, hosts) = &self.get_parasite_host_from_graph();
580
581        // make the circles.
582        // want them 1/3 of the way down the SVG
583        let mut parasite_nodes = String::new();
584        let mut parasite_pos = HashMap::new();
585
586        let parasite_spacing = (width as f64 - (MARGIN_LR * 2.0)) / parasites.len() as f64;
587
588        // mut i, is because we want indexing to start at 1
589        // for SVG formatting reasons. We -1 from i for x, as
590        // we want to multiply by zero for the first node.
591        for (mut i, (node, spp_name)) in parasites.iter().enumerate() {
592            i += 1;
593            // store the x and y coords of parasites
594            let x = ((i - 1) as f64 * parasite_spacing) + (parasite_spacing / 2.0) + MARGIN_LR;
595            let y = height as f64 / NODE_SCALE;
596            // for edge drawing later
597            parasite_pos.insert(*node, (x, y));
598
599            parasite_nodes += &format!(
600                "<circle cx=\"{x}\" cy=\"{y}\" r=\"6\" fill=\"green\"><title>{spp_name}</title></circle>\n{}",
601                if i >= 1 { "\t" } else { "" }
602            );
603        }
604
605        // host nodes here
606        // scaling the nodes by how many incoming connections they have.
607        let mut incoming_nodes_vec = Vec::new();
608        for (node, _) in hosts.iter() {
609            if graph.neighbors_directed(*node, Outgoing).count() > 0 {
610                continue;
611            } else {
612                incoming_nodes_vec.push(graph.neighbors_directed(*node, Incoming).count());
613            }
614        }
615
616        let mut host_nodes = String::new();
617        let mut host_pos = HashMap::new();
618
619        let host_spacing = (width as f64 - (MARGIN_LR * 2.0)) / hosts.len() as f64;
620
621        for (mut i, (node, spp_name)) in hosts.iter().enumerate() {
622            i += 1;
623            // store the x and y coords of parasites
624            let x = ((i - 1) as f64 * host_spacing) + (host_spacing / 2.0) + MARGIN_LR;
625            let y = (height as f64 / NODE_SCALE) * 3.0;
626            let r = graph.neighbors_directed(*node, Incoming).count();
627            // for edge drawing later
628            host_pos.insert(*node, (x, y));
629
630            host_nodes += &format!(
631                "<circle cx=\"{x}\" cy=\"{y}\" r=\"{}\" fill=\"red\"><title>{spp_name}</title></circle>\n{}",
632                scale_fit(
633                    r as f64,
634                    *incoming_nodes_vec.iter().min().unwrap() as f64,
635                    *incoming_nodes_vec.iter().max().unwrap() as f64
636                ) * 5.0,
637                if i >= 1 { "\t" } else { "" }
638            );
639        }
640
641        let mut edge_links = String::new();
642        // in order to scale the thickness of the lines ('fitness')
643        // I'll need to find the min/max of the edge weights
644
645        let mut fitness_vec = Vec::new();
646        for edge in graph.edge_references() {
647            fitness_vec.push(*edge.weight());
648        }
649
650        // remove these unwraps.
651        let fit_min = fitness_vec
652            .iter()
653            .min_by(|a, b| a.partial_cmp(b).unwrap())
654            .unwrap();
655        let fit_max = fitness_vec
656            .iter()
657            .max_by(|a, b| a.partial_cmp(b).unwrap())
658            .unwrap();
659
660        // now draw the edges
661        for (mut i, edge) in graph.edge_references().enumerate() {
662            i += 1;
663            let from = edge.source();
664            let to = edge.target();
665            let fitness = *edge.weight();
666
667            let (x1, y1) = parasite_pos.get(&from).unwrap();
668            // to allow for self parasitism (or association I should say)!
669            let (x2, y2) = host_pos
670                .get(&to)
671                .unwrap_or_else(|| parasite_pos.get(&from).unwrap());
672
673            edge_links += &format!(
674                "<line x1=\"{x1}\" y1=\"{y1}\" x2=\"{x2}\" y2=\"{y2}\" stroke=\"black\" stroke-width=\"{}\"/>\n{}",
675                scale_fit(fitness, *fit_min, *fit_max),
676                if i >= 1 { "\t" } else { "" }
677            );
678        }
679
680        let svg = format!(
681            r#"<svg version="1.1"
682    width="{width}" height="{height}"
683    xmlns="http://www.w3.org/2000/svg">
684    {edge_links}
685    {parasite_nodes}
686    {host_nodes}
687</svg>
688        "#
689        );
690
691        let _ = stdoutln!("{}", svg);
692    }
693
694    /// Plots the bipartite graph as a proportional diagram (Sankey-like).
695    ///
696    /// Nodes are drawn as rectangles sized according to the number of
697    /// connections. Edges are drawn as polygons representing connections
698    /// between hosts and parasites.
699    ///
700    /// # Arguments
701    /// * `width` - Width of the SVG canvas.
702    /// * `height` - Height of the SVG canvas.
703    pub fn plot_prop(&self, width: i32, height: i32) {
704        let graph = &self.0;
705
706        let canvas_width = width as f64 - (2.0 * MARGIN_LR);
707        let upper_stratum_height = height as f64 / 4.0;
708        let lower_stratum_height = height as f64 / 4.0 * 3.0;
709
710        let rect_height = 20.0;
711        let inner_margin = 3.0;
712
713        // calculate total number of parasite connections
714        let (parasites, hosts) = &self.get_parasite_host_from_graph();
715        // TODO: it's possible the hosts and parasites
716        // can be reordered here to increase visual appeal
717
718        let _ = stderrln!("Number of parasites: {}", parasites.len());
719        let _ = stderrln!("Number of hosts: {}", hosts.len());
720
721        let mut parasite_connections = vec![];
722        for (node, s) in parasites.iter() {
723            parasite_connections.push((
724                graph.neighbors_directed(*node, Incoming).count() as f64
725                    + graph.neighbors_directed(*node, Outgoing).count() as f64,
726                node,
727                s,
728            ));
729        }
730        let total_parasite_connections =
731            parasite_connections.iter().map(|(e, _, _)| e).sum::<f64>();
732
733        // now iterate over the parasites and draw the polygons
734        let mut cumsum_node_connections = 0.0;
735        let mut parasite_polygons = String::new();
736        let mut parasite_pos = HashMap::new();
737
738        let (y1_u, y2_u) = (upper_stratum_height, upper_stratum_height + rect_height);
739        for (i, (node_connections, node, parasite)) in parasite_connections.iter().enumerate() {
740            // special case for first node
741            let (x1, x2) = if i == 0 {
742                cumsum_node_connections += node_connections;
743                let pos = (
744                    MARGIN_LR,
745                    ((cumsum_node_connections / total_parasite_connections) * canvas_width)
746                        - inner_margin,
747                );
748
749                // for drawing edges
750                parasite_pos.insert(**node, (pos.0, pos.0, pos.1, y1_u, y2_u));
751
752                pos
753            } else {
754                let curr_conns = cumsum_node_connections;
755                cumsum_node_connections += node_connections;
756                let pos = (
757                    ((curr_conns / total_parasite_connections) * canvas_width) + inner_margin,
758                    ((cumsum_node_connections / total_parasite_connections) * canvas_width)
759                        - inner_margin,
760                );
761                // for drawing edges
762                parasite_pos.insert(**node, (pos.0, pos.0, pos.1, y1_u, y2_u));
763
764                pos
765            };
766
767            // now make the polygon element
768            let polygon = format!(
769                r#"<polygon points="{x2},{y1_u} {x1},{y1_u} {x1},{y2_u} {x2},{y2_u}" fill="green" stroke="black" stroke-width="1" ><title>{parasite}</title></polygon>\n{end}"#,
770                x1 = x1,
771                y1_u = y1_u,
772                x2 = x2,
773                y2_u = y2_u,
774                parasite = parasite,
775                end = if i >= 1 { "\t" } else { "" }
776            );
777
778            parasite_polygons += &polygon;
779        }
780
781        // hosts
782        let mut host_connections = vec![];
783        for (node, s) in hosts.iter() {
784            host_connections.push((
785                graph.neighbors_directed(*node, Incoming).count() as f64
786                    + graph.neighbors_directed(*node, Outgoing).count() as f64,
787                node,
788                s,
789            ));
790        }
791
792        let total_host_connections = host_connections.iter().map(|(e, _, _)| e).sum::<f64>();
793
794        // now we iterate over the hosts and draw the polygons
795        let mut cumsum_node_connections = 0.0;
796        let mut host_polygons = String::new();
797        let mut host_pos = HashMap::new();
798
799        let (y1_l, y2_l) = (lower_stratum_height, lower_stratum_height + rect_height);
800        for (i, (node_connections, node, host)) in host_connections.iter().enumerate() {
801            // special case for first node
802            let (x1, x2) = if i == 0 {
803                cumsum_node_connections += node_connections;
804                let pos = (
805                    MARGIN_LR,
806                    ((cumsum_node_connections / total_host_connections) * canvas_width)
807                        - inner_margin,
808                );
809
810                host_pos.insert(**node, (pos.0, pos.0, pos.1, y1_l, y2_l));
811
812                pos
813            } else {
814                let curr_conns = cumsum_node_connections;
815                cumsum_node_connections += node_connections;
816                let pos = (
817                    ((curr_conns / total_host_connections) * canvas_width) + inner_margin,
818                    ((cumsum_node_connections / total_host_connections) * canvas_width)
819                        - inner_margin,
820                );
821
822                host_pos.insert(**node, (pos.0, pos.0, pos.1, y1_l, y2_l));
823                pos
824            };
825
826            // now make the polygon element
827            let polygon = format!(
828                r#"<polygon points="{x2},{y1_l} {x1},{y1_l} {x1},{y2_l} {x2},{y2_l}" fill="red" stroke="black" stroke-width="1"><title>{host}</title></polygon>\n{end}"#,
829                x1 = x1,
830                y1_l = y1_l,
831                x2 = x2,
832                y2_l = y2_l,
833                host = host,
834                end = if i >= 1 { "\t" } else { "" }
835            );
836
837            host_polygons += &polygon;
838        }
839
840        // and now the edges using polygons again
841        let mut edge_polygons = String::new();
842        for (mut i, edge) in graph.edge_references().enumerate() {
843            i += 1;
844            let from = edge.source();
845            let to = edge.target();
846            let _fitness = *edge.weight();
847
848            // we need to mutate the x coords of the polygons
849            let (x1_update_p, x1_p, x2_p, _y1_p, y2_p) = parasite_pos.get_mut(&from).unwrap();
850            // FIXME: this unwrap panics; only when the graph is not bipartite
851            // I haven't been bothered yet to figure it out properly.
852            let (x1_update_h, x1_h, x2_h, y1_h, _y2_h) = host_pos.get_mut(&to).unwrap();
853
854            // get total number of connections for the parasite
855            let total_parasite_connections = graph.neighbors_directed(from, Outgoing).count()
856                as f64
857                + graph.neighbors_directed(from, Incoming).count() as f64;
858
859            // get the number of connections for the parasite from the current host
860            let p_to_h = graph
861                .neighbors_directed(from, Outgoing)
862                .filter(|e| e == &to)
863                .count();
864
865            // now scale the x coordinates of the polygons by the number of connections
866            let parasite_pos_width = *x2_p - *x1_p;
867            let current_parasite_width =
868                parasite_pos_width * (p_to_h as f64 / total_parasite_connections);
869
870            // goes from x1_update to x1_update + current_parasite_width
871            let x1_update_p_clone = x1_update_p.clone();
872            let (p_poly_1, p_poly_2) = (
873                x1_update_p_clone,
874                x1_update_p_clone + current_parasite_width,
875            );
876
877            // the hosts
878
879            // get total number of connections for the parasite
880            let total_host_connections = graph.neighbors_directed(to, Outgoing).count() as f64
881                + graph.neighbors_directed(to, Incoming).count() as f64;
882
883            let h_from_p = graph
884                .neighbors_directed(to, Incoming)
885                .filter(|e| e == &from)
886                .count();
887
888            // now scale the x coordinates of the polygons by the number of connections for the host
889            let host_pos_width = *x2_h - *x1_h;
890            let current_host_width = host_pos_width * (h_from_p as f64 / total_host_connections);
891
892            // goes from x1_update to x1_update + current_parasite_width
893            let x1_update_h_clone = x1_update_h.clone();
894            let (h_poly_1, h_poly_2) = (x1_update_h_clone, x1_update_h_clone + current_host_width);
895
896            // now update the x's
897            *x1_update_p += current_parasite_width;
898            *x1_update_h += current_host_width;
899
900            // and create the polygon edges
901            let edge_polygon = format!(
902                r#"<polygon points="{p_poly_1},{y2_p} {p_poly_2},{y2_p} {h_poly_2},{y1_h} {h_poly_1},{y1_h}" fill-opacity="50%" />\n{}"#,
903                if i >= 1 { "\t" } else { "" }
904            );
905            edge_polygons += &edge_polygon;
906        }
907
908        let svg = format!(
909            r#"<svg version="1.1"
910    width="{width}" height="{height}"
911    xmlns="http://www.w3.org/2000/svg">
912    {parasite_polygons}
913    {host_polygons}
914    {edge_polygons}
915</svg>
916        "#
917        );
918
919        let _ = stdoutln!("{}", svg);
920    }
921
922    /// Exports the bipartite graph to a tab-separated values (TSV) string.
923    ///
924    /// Output includes columns: `from`, `to`, `weight`.
925    ///
926    /// # Returns
927    /// * `Ok(String)` with TSV contents.
928    /// * `Err(DSVError)` if data is missing or invalid.
929    pub fn to_tsv(&self) -> Result<String, DSVError> {
930        let mut tsv = String::new();
931        tsv += "from\tto\tweight\n";
932
933        let g = &self.0;
934        for e in g.edge_references() {
935            let w = g.edge_weight(e.id()).ok_or(DSVError::Weight)?;
936
937            let s = g.node_weight(e.source()).ok_or(DSVError::Source)?;
938            let t = g.node_weight(e.target()).ok_or(DSVError::Target)?;
939
940            tsv += &format!("{s}\t{t}\t{w}\n");
941        }
942
943        Ok(tsv)
944    }
945}
946
947#[cfg(test)]
948mod tests {
949
950    use super::*;
951
952    // the test graph looks like this
953    //
954    //         a    b
955    //         | \ /
956    //         | /\
957    //         |/  \
958    //         c    d
959
960    fn make_graph() -> BipartiteGraph {
961        let mut graph: Graph<SpeciesNode, Fitness> = Graph::new();
962        let a = graph.add_node(SpeciesNode::new("a".into(), Partition::Parasites));
963        let b = graph.add_node(SpeciesNode::new("b".into(), Partition::Parasites));
964        let c = graph.add_node(SpeciesNode::new("c".into(), Partition::Hosts));
965        let d = graph.add_node(SpeciesNode::new("d".into(), Partition::Hosts));
966
967        graph.add_edge(a, c, 1.0);
968        graph.add_edge(a, d, 1.0);
969        graph.add_edge(b, c, 1.0);
970
971        BipartiteGraph(graph)
972    }
973
974    #[test]
975    fn test_bipartite() {
976        let g = make_graph();
977        let bp = g.is_bipartite();
978
979        match bp {
980            Strata::Yes(_) => (),
981            Strata::No => panic!(),
982        }
983    }
984
985    #[test]
986    fn test_tsv() {
987        let g = make_graph();
988
989        let tsv = g.to_tsv().unwrap();
990
991        //  from    to      weight
992        //  a       c       1
993        //  a       d       1
994        //  b       c       1
995
996        assert_eq!(
997            "from\tto\tweight\na\tc\t1\na\td\t1\nb\tc\t1\n".to_string(),
998            tsv
999        )
1000    }
1001
1002    #[test]
1003    fn test_bivariate_degree_dist() {
1004        let g = make_graph();
1005
1006        let bdd = g.bivariate_degree_distribution();
1007
1008        assert_eq!(vec![(2, 2), (2, 1), (1, 2)], bdd)
1009    }
1010
1011    // binary degree tests
1012    #[test]
1013    fn test_degrees_all() {
1014        let g = make_graph();
1015        let degrees = g.degrees(None);
1016
1017        // Expected degrees:
1018        // a -> 2 (Parasites)
1019        // b -> 1 (Parasites)
1020        // c -> 2 (Hosts)
1021        // d -> 1 (Hosts)
1022
1023        assert!(degrees.contains(&("a".to_string(), Partition::Parasites, 2)));
1024        assert!(degrees.contains(&("b".to_string(), Partition::Parasites, 1)));
1025        assert!(degrees.contains(&("c".to_string(), Partition::Hosts, 2)));
1026        assert!(degrees.contains(&("d".to_string(), Partition::Hosts, 1)));
1027    }
1028
1029    #[test]
1030    fn test_degrees_partition_hosts() {
1031        let g = make_graph();
1032        let degrees = g.degrees(Some(Partition::Hosts));
1033
1034        assert_eq!(degrees.len(), 2);
1035        assert!(degrees.contains(&("c".to_string(), Partition::Hosts, 2)));
1036        assert!(degrees.contains(&("d".to_string(), Partition::Hosts, 1)));
1037    }
1038
1039    // weighted degrees
1040    #[test]
1041    fn test_weighted_degrees_partition_parasites() {
1042        let g = make_graph();
1043        let weighted_degrees = g.weighted_degrees(Some(Partition::Parasites));
1044
1045        // a -> 2.0, b -> 1.0
1046        assert!(weighted_degrees.contains(&("a".to_string(), Partition::Parasites, 2.0)));
1047        assert!(weighted_degrees.contains(&("b".to_string(), Partition::Parasites, 1.0)));
1048    }
1049
1050    // degree distribution
1051    #[test]
1052    fn test_degree_distribution_unweighted_hosts() {
1053        let g = make_graph();
1054        let (bin_width, histogram) = g.degree_distribution(Some(Partition::Hosts), false);
1055
1056        assert!(bin_width > 0.0);
1057        eprintln!("{:?}", histogram);
1058        // Hosts degrees: c -> 2, d -> 1
1059        // Everything lumped in the same bin for this small example
1060        assert_eq!(histogram.get(&OrderedFloat(1.0)).cloned(), Some(2));
1061        // Fails for this small dataset..
1062        // assert_eq!(histogram.get(&OrderedFloat(2.0)).cloned(), Some(1));
1063    }
1064
1065    // check host/parasite partitioning
1066    #[test]
1067    fn test_get_parasite_host_from_graph() {
1068        let g = make_graph();
1069        let (parasites, hosts) = g.get_parasite_host_from_graph();
1070
1071        assert_eq!(parasites.len(), 2);
1072        assert!(parasites.iter().any(|(_, n)| n.name == "a"));
1073        assert!(parasites.iter().any(|(_, n)| n.name == "b"));
1074
1075        assert_eq!(hosts.len(), 2);
1076        assert!(hosts.iter().any(|(_, n)| n.name == "c"));
1077        assert!(hosts.iter().any(|(_, n)| n.name == "d"));
1078    }
1079
1080    // random graph generation
1081    #[test]
1082    fn test_random_graph_valid() {
1083        let g = BipartiteGraph::random(5, 5, 10).unwrap();
1084        let stats = g.stats();
1085
1086        assert_eq!(stats.no_parasites, 5);
1087        assert_eq!(stats.no_hosts, 5);
1088        assert_eq!(stats.no_edges, 10);
1089    }
1090
1091    #[test]
1092    #[should_panic(expected = "MaxEdges")]
1093    fn test_random_graph_too_many_edges() {
1094        let _ = BipartiteGraph::random(2, 2, 5).unwrap();
1095    }
1096
1097    #[test]
1098    #[should_panic(expected = "NoNodes")]
1099    fn test_random_graph_zero_nodes() {
1100        let _ = BipartiteGraph::random(0, 2, 1).unwrap();
1101    }
1102
1103    #[test]
1104    fn test_is_bipartite_simple() {
1105        let g = make_graph();
1106
1107        let bp = g.is_bipartite();
1108
1109        match bp {
1110            Strata::Yes(partition_map) => {
1111                assert_eq!(partition_map.len(), 4);
1112            }
1113            Strata::No => panic!("Graph should be bipartite but was not detected as such."),
1114        }
1115    }
1116
1117    #[test]
1118    fn test_is_bipartite_triangle_not_bipartite() {
1119        let mut graph: Graph<SpeciesNode, Fitness> = Graph::new();
1120
1121        let a = graph.add_node(SpeciesNode::new("a".into(), Partition::Parasites));
1122        let b = graph.add_node(SpeciesNode::new("b".into(), Partition::Parasites));
1123        let c = graph.add_node(SpeciesNode::new("c".into(), Partition::Parasites));
1124
1125        graph.add_edge(a, b, 1.0);
1126        graph.add_edge(b, c, 1.0);
1127        graph.add_edge(c, a, 1.0);
1128
1129        let g = BipartiteGraph(graph);
1130
1131        let bp = g.is_bipartite();
1132
1133        match bp {
1134            Strata::No => (),
1135            Strata::Yes(_) => panic!("Graph should NOT be bipartite but was detected as such."),
1136        }
1137    }
1138
1139    #[test]
1140    fn test_is_bipartite_disconnected_nodes() {
1141        let mut graph: Graph<SpeciesNode, Fitness> = Graph::new();
1142
1143        let _a = graph.add_node(SpeciesNode::new("a".into(), Partition::Parasites));
1144        let _b = graph.add_node(SpeciesNode::new("b".into(), Partition::Parasites));
1145        let _c = graph.add_node(SpeciesNode::new("c".into(), Partition::Hosts));
1146
1147        let g = BipartiteGraph(graph);
1148
1149        let bp = g.is_bipartite();
1150
1151        match bp {
1152            Strata::Yes(partition_map) => {
1153                assert_eq!(partition_map.len(), 0);
1154            }
1155            Strata::No => panic!("Disconnected graph should be bipartite."),
1156        }
1157    }
1158
1159    #[test]
1160    fn test_is_bipartite_single_node() {
1161        let mut graph: Graph<SpeciesNode, Fitness> = Graph::new();
1162
1163        let _a = graph.add_node(SpeciesNode::new("a".into(), Partition::Parasites));
1164
1165        let g = BipartiteGraph(graph);
1166
1167        let bp = g.is_bipartite();
1168
1169        match bp {
1170            Strata::Yes(partition_map) => {
1171                assert_eq!(partition_map.len(), 0);
1172            }
1173            Strata::No => panic!("Single node graph should be bipartite."),
1174        }
1175    }
1176}