Skip to main content

rust_igraph/algorithms/io/
ncol.rs

1//! NCOL (LGL edge list) I/O (ALGO-IO-002).
2//!
3//! Reads and writes graphs in the `.ncol` format used by the Large Graph
4//! Layout (LGL) program. This is a symbolic weighted edge list: one edge
5//! per line, two vertex names separated by whitespace, optionally followed
6//! by an edge weight.
7//!
8//! The resulting graph is always **undirected**. Vertex names are mapped to
9//! internal indices in first-occurrence order.
10//!
11//! Counterpart of `igraph_read_graph_ncol` / `igraph_write_graph_ncol`.
12
13use std::collections::HashMap;
14use std::io::{BufRead, BufReader, Read, Write};
15
16use crate::core::{Graph, IgraphError, IgraphResult};
17
18/// Result of reading an NCOL file: the graph plus optional metadata.
19#[derive(Debug, Clone)]
20pub struct NcolGraph {
21    /// The parsed graph (always undirected).
22    pub graph: Graph,
23    /// Vertex names in index order (name at position `i` is the name of
24    /// vertex `i`).
25    pub names: Vec<String>,
26    /// Edge weights (one per edge, in edge order). `None` if no edge had
27    /// a weight specified. If some edges have weights and some don't, the
28    /// missing weights default to 0.0.
29    pub weights: Option<Vec<f64>>,
30}
31
32/// Read a graph from NCOL (`.ncol`) format.
33///
34/// Each non-empty, non-comment line defines an edge:
35/// ```text
36/// vertex1 vertex2 [weight]
37/// ```
38///
39/// Lines starting with `#` or `%` are comments. Vertex names are arbitrary
40/// non-whitespace strings. The graph is always undirected.
41///
42/// A line with a single name creates an isolated vertex (igraph extension).
43///
44/// # Examples
45///
46/// ```
47/// use rust_igraph::read_ncol;
48///
49/// let ncol = b"Alice Bob 1.0\nBob Carol\nAlice Carol 2.5\n";
50/// let result = read_ncol(&ncol[..]).unwrap();
51/// assert_eq!(result.graph.vcount(), 3);
52/// assert_eq!(result.graph.ecount(), 3);
53/// assert_eq!(result.names, vec!["Alice", "Bob", "Carol"]);
54/// assert!(result.weights.is_some());
55/// ```
56pub fn read_ncol<R: Read>(input: R) -> IgraphResult<NcolGraph> {
57    let reader = BufReader::new(input);
58    let mut name_map: HashMap<String, u32> = HashMap::new();
59    let mut names: Vec<String> = Vec::new();
60    let mut edges: Vec<(u32, u32)> = Vec::new();
61    let mut weights: Vec<f64> = Vec::new();
62    let mut has_any_weight = false;
63
64    for (line_idx, line_result) in reader.lines().enumerate() {
65        let line = line_result?;
66        let trimmed = line.trim();
67
68        if trimmed.is_empty() || trimmed.starts_with('#') || trimmed.starts_with('%') {
69            continue;
70        }
71
72        let mut parts = trimmed.split_whitespace();
73
74        let vertex_a = parts.next().ok_or_else(|| IgraphError::Parse {
75            line: line_idx.wrapping_add(1),
76            message: "empty edge line".into(),
77        })?;
78
79        let id1 = get_or_insert_name(vertex_a, &mut name_map, &mut names);
80
81        // If there's no second token, this is an isolated vertex declaration
82        let Some(vertex_b) = parts.next() else {
83            continue;
84        };
85
86        let id2 = get_or_insert_name(vertex_b, &mut name_map, &mut names);
87        edges.push((id1, id2));
88
89        // Optional weight
90        let weight = if let Some(w_str) = parts.next() {
91            let w = w_str.parse::<f64>().map_err(|e| IgraphError::Parse {
92                line: line_idx.wrapping_add(1),
93                message: format!("invalid weight '{w_str}': {e}"),
94            })?;
95            has_any_weight = true;
96            w
97        } else {
98            0.0
99        };
100        weights.push(weight);
101    }
102
103    #[allow(clippy::cast_possible_truncation)]
104    let n = names.len() as u32;
105    let mut graph = Graph::with_vertices(n);
106    graph.add_edges(edges)?;
107
108    Ok(NcolGraph {
109        graph,
110        names,
111        weights: if has_any_weight { Some(weights) } else { None },
112    })
113}
114
115/// Write a graph in NCOL format.
116///
117/// If `names` is provided, uses them as vertex labels; otherwise uses
118/// numeric ids as strings. If `weights` is provided (one per edge),
119/// appends the weight after each edge.
120///
121/// # Examples
122///
123/// ```
124/// use rust_igraph::{Graph, write_ncol};
125///
126/// let mut g = Graph::with_vertices(3);
127/// g.add_edge(0, 1).unwrap();
128/// g.add_edge(1, 2).unwrap();
129///
130/// let names = vec!["A".to_string(), "B".to_string(), "C".to_string()];
131/// let mut buf = Vec::new();
132/// write_ncol(&g, Some(&names), None, &mut buf).unwrap();
133/// let s = String::from_utf8(buf).unwrap();
134/// assert!(s.contains("A B"));
135/// assert!(s.contains("B C"));
136/// ```
137pub fn write_ncol<W: Write>(
138    graph: &Graph,
139    names: Option<&[String]>,
140    weights: Option<&[f64]>,
141    writer: &mut W,
142) -> IgraphResult<()> {
143    if let Some(n) = names {
144        if n.len() != graph.vcount() as usize {
145            return Err(IgraphError::InvalidArgument(format!(
146                "names length {} does not match vcount {}",
147                n.len(),
148                graph.vcount()
149            )));
150        }
151    }
152    if let Some(w) = weights {
153        if w.len() != graph.ecount() {
154            return Err(IgraphError::InvalidArgument(format!(
155                "weights length {} does not match ecount {}",
156                w.len(),
157                graph.ecount()
158            )));
159        }
160    }
161
162    for eid in 0..graph.ecount() {
163        #[allow(clippy::cast_possible_truncation)]
164        let (src, tgt) = graph.edge(eid as u32)?;
165
166        let src_name = match names {
167            Some(n) => n[src as usize].as_str().to_owned(),
168            None => src.to_string(),
169        };
170        let tgt_name = match names {
171            Some(n) => n[tgt as usize].as_str().to_owned(),
172            None => tgt.to_string(),
173        };
174
175        match weights {
176            Some(w) => writeln!(writer, "{src_name} {tgt_name} {}", w[eid])?,
177            None => writeln!(writer, "{src_name} {tgt_name}")?,
178        }
179    }
180
181    Ok(())
182}
183
184fn get_or_insert_name(name: &str, map: &mut HashMap<String, u32>, names: &mut Vec<String>) -> u32 {
185    if let Some(&id) = map.get(name) {
186        id
187    } else {
188        #[allow(clippy::cast_possible_truncation)]
189        let id = names.len() as u32;
190        map.insert(name.to_owned(), id);
191        names.push(name.to_owned());
192        id
193    }
194}
195
196#[cfg(test)]
197mod tests {
198    use super::*;
199
200    #[test]
201    fn test_empty() {
202        let result = read_ncol(&b""[..]).unwrap();
203        assert_eq!(result.graph.vcount(), 0);
204        assert_eq!(result.graph.ecount(), 0);
205        assert!(result.names.is_empty());
206        assert!(result.weights.is_none());
207    }
208
209    #[test]
210    fn test_basic_unweighted() {
211        let input = b"a b\nb c\n";
212        let result = read_ncol(&input[..]).unwrap();
213        assert_eq!(result.graph.vcount(), 3);
214        assert_eq!(result.graph.ecount(), 2);
215        assert_eq!(result.names, vec!["a", "b", "c"]);
216        assert!(result.weights.is_none());
217    }
218
219    #[test]
220    fn test_weighted() {
221        let input = b"a b 1.5\nb c 2.0\n";
222        let result = read_ncol(&input[..]).unwrap();
223        assert_eq!(result.graph.ecount(), 2);
224        let w = result.weights.unwrap();
225        assert!((w[0] - 1.5).abs() < 1e-10);
226        assert!((w[1] - 2.0).abs() < 1e-10);
227    }
228
229    #[test]
230    fn test_mixed_weights() {
231        let input = b"a b 3.0\nb c\n";
232        let result = read_ncol(&input[..]).unwrap();
233        let w = result.weights.unwrap();
234        assert!((w[0] - 3.0).abs() < 1e-10);
235        assert!((w[1] - 0.0).abs() < 1e-10);
236    }
237
238    #[test]
239    fn test_comments_and_blanks() {
240        let input = b"# comment\n\n% another comment\na b\n";
241        let result = read_ncol(&input[..]).unwrap();
242        assert_eq!(result.graph.vcount(), 2);
243        assert_eq!(result.graph.ecount(), 1);
244    }
245
246    #[test]
247    fn test_isolated_vertex() {
248        let input = b"a b\nc\n";
249        let result = read_ncol(&input[..]).unwrap();
250        assert_eq!(result.graph.vcount(), 3);
251        assert_eq!(result.graph.ecount(), 1);
252        assert_eq!(result.names[2], "c");
253    }
254
255    #[test]
256    fn test_self_loop() {
257        let input = b"a a\n";
258        let result = read_ncol(&input[..]).unwrap();
259        assert_eq!(result.graph.vcount(), 1);
260        assert_eq!(result.graph.ecount(), 1);
261    }
262
263    #[test]
264    fn test_always_undirected() {
265        let input = b"x y\ny x\n";
266        let result = read_ncol(&input[..]).unwrap();
267        assert!(!result.graph.is_directed());
268        assert_eq!(result.graph.ecount(), 2); // multi-edge allowed
269    }
270
271    #[test]
272    fn test_negative_weight() {
273        let input = b"a b -1.5\n";
274        let result = read_ncol(&input[..]).unwrap();
275        let w = result.weights.unwrap();
276        assert!((w[0] - (-1.5)).abs() < 1e-10);
277    }
278
279    #[test]
280    fn test_scientific_notation_weight() {
281        let input = b"a b 1.5e-3\n";
282        let result = read_ncol(&input[..]).unwrap();
283        let w = result.weights.unwrap();
284        assert!((w[0] - 0.0015).abs() < 1e-10);
285    }
286
287    #[test]
288    fn test_invalid_weight_error() {
289        let input = b"a b notanumber\n";
290        let result = read_ncol(&input[..]);
291        assert!(result.is_err());
292    }
293
294    #[test]
295    fn test_write_unweighted() {
296        let mut g = Graph::with_vertices(3);
297        g.add_edge(0, 1).unwrap();
298        g.add_edge(1, 2).unwrap();
299
300        let names = vec!["A".to_string(), "B".to_string(), "C".to_string()];
301        let mut buf = Vec::new();
302        write_ncol(&g, Some(&names), None, &mut buf).unwrap();
303        let s = String::from_utf8(buf).unwrap();
304        assert!(s.contains("A B\n"));
305        assert!(s.contains("B C\n"));
306    }
307
308    #[test]
309    fn test_write_weighted() {
310        let mut g = Graph::with_vertices(2);
311        g.add_edge(0, 1).unwrap();
312
313        let names = vec!["x".to_string(), "y".to_string()];
314        let weights = vec![2.75];
315        let mut buf = Vec::new();
316        write_ncol(&g, Some(&names), Some(&weights), &mut buf).unwrap();
317        let s = String::from_utf8(buf).unwrap();
318        assert!(s.contains("x y 2.75"));
319    }
320
321    #[test]
322    fn test_write_numeric_names() {
323        let mut g = Graph::with_vertices(2);
324        g.add_edge(0, 1).unwrap();
325
326        let mut buf = Vec::new();
327        write_ncol(&g, None, None, &mut buf).unwrap();
328        let s = String::from_utf8(buf).unwrap();
329        assert!(s.contains("0 1"));
330    }
331
332    #[test]
333    fn test_round_trip() {
334        let input = b"Alice Bob 1.0\nBob Carol 2.0\nAlice Carol 3.0\n";
335        let result = read_ncol(&input[..]).unwrap();
336
337        let mut buf = Vec::new();
338        write_ncol(
339            &result.graph,
340            Some(&result.names),
341            result.weights.as_deref(),
342            &mut buf,
343        )
344        .unwrap();
345
346        let result2 = read_ncol(&buf[..]).unwrap();
347        assert_eq!(result2.graph.vcount(), result.graph.vcount());
348        assert_eq!(result2.graph.ecount(), result.graph.ecount());
349        assert_eq!(result2.names, result.names);
350    }
351
352    #[test]
353    fn test_write_names_length_mismatch() {
354        let g = Graph::with_vertices(3);
355        let names = vec!["A".to_string()];
356        let mut buf = Vec::new();
357        assert!(write_ncol(&g, Some(&names), None, &mut buf).is_err());
358    }
359
360    #[test]
361    fn test_write_weights_length_mismatch() {
362        let mut g = Graph::with_vertices(2);
363        g.add_edge(0, 1).unwrap();
364        let weights = vec![1.0, 2.0]; // 2 weights but only 1 edge
365        let mut buf = Vec::new();
366        assert!(write_ncol(&g, None, Some(&weights), &mut buf).is_err());
367    }
368}