Skip to main content

rust_igraph/algorithms/io/
pajek.rs

1//! Pajek (.net) I/O (ALGO-IO-005).
2//!
3//! Reads and writes graphs in a subset of the Pajek `.net` format.
4//! The format has a `*Vertices N` header followed by optional vertex
5//! labels, then `*Edges` (undirected) or `*Arcs` (directed) sections
6//! listing edges with optional weights.
7//!
8//! ```text
9//! *Vertices 4
10//! 1 "Alice"
11//! 2 "Bob"
12//! 3 "Carol"
13//! 4 "Dave"
14//! *Edges
15//! 1 2 1.0
16//! 2 3
17//! 3 4 2.5
18//! ```
19//!
20//! Limitations:
21//! - Only `.net` files (not `.paj` project files)
22//! - No temporal networks
23//! - Mixed directed/undirected not supported
24//! - Bipartite mode not supported (vertex types ignored)
25//!
26//! Counterpart of `igraph_read_graph_pajek` / `igraph_write_graph_pajek`.
27
28use std::io::{BufRead, BufReader, Read, Write};
29
30use crate::core::{Graph, IgraphError, IgraphResult};
31
32/// Result of reading a Pajek file.
33#[derive(Debug, Clone)]
34pub struct PajekGraph {
35    /// The parsed graph.
36    pub graph: Graph,
37    /// Vertex labels (one per vertex, in index order). `None` if no
38    /// vertex had a label.
39    pub labels: Option<Vec<String>>,
40    /// Edge weights (one per edge, in edge order). `None` if no edge
41    /// had a weight.
42    pub weights: Option<Vec<f64>>,
43}
44
45/// Read a graph from Pajek (`.net`) format.
46///
47/// Supports `*Vertices`, `*Edges` (undirected), and `*Arcs` (directed)
48/// sections. Vertex IDs in the file are 1-based.
49///
50/// # Examples
51///
52/// ```
53/// use rust_igraph::read_pajek;
54///
55/// let pajek = b"*Vertices 3\n1 \"A\"\n2 \"B\"\n3 \"C\"\n*Edges\n1 2\n2 3\n1 3\n";
56/// let result = read_pajek(&pajek[..]).unwrap();
57/// assert_eq!(result.graph.vcount(), 3);
58/// assert_eq!(result.graph.ecount(), 3);
59/// assert!(!result.graph.is_directed());
60/// ```
61#[allow(clippy::too_many_lines)]
62pub fn read_pajek<R: Read>(input: R) -> IgraphResult<PajekGraph> {
63    #[derive(PartialEq)]
64    enum Section {
65        None,
66        Vertices,
67        Edges,
68    }
69
70    let reader = BufReader::new(input);
71
72    let mut n_vertices: Option<u32> = None;
73    let mut directed = false;
74    let mut labels: Vec<String> = Vec::new();
75    let mut has_any_label = false;
76    let mut edges: Vec<(u32, u32)> = Vec::new();
77    let mut weights: Vec<f64> = Vec::new();
78    let mut has_any_weight = false;
79    let mut section = Section::None;
80
81    for (line_idx, line_result) in reader.lines().enumerate() {
82        let line = line_result?;
83        let trimmed = line.trim();
84
85        if trimmed.is_empty() || trimmed.starts_with('%') {
86            continue;
87        }
88
89        let lower = trimmed.to_ascii_lowercase();
90
91        // Section headers
92        if lower.starts_with("*vertices") {
93            let parts: Vec<&str> = trimmed.split_whitespace().collect();
94            if parts.len() < 2 {
95                return Err(IgraphError::Parse {
96                    line: line_idx.wrapping_add(1),
97                    message: "*Vertices line needs vertex count".into(),
98                });
99            }
100            let n: u32 = parts[1].parse().map_err(|e| IgraphError::Parse {
101                line: line_idx.wrapping_add(1),
102                message: format!("invalid vertex count: {e}"),
103            })?;
104            n_vertices = Some(n);
105            labels = vec![String::new(); n as usize];
106            section = Section::Vertices;
107            continue;
108        }
109
110        if lower.starts_with("*edges") || lower.starts_with("*edgeslist") {
111            directed = false;
112            section = Section::Edges;
113            continue;
114        }
115
116        if lower.starts_with("*arcs") || lower.starts_with("*arcslist") {
117            directed = true;
118            section = Section::Edges;
119            continue;
120        }
121
122        // Skip other section headers we don't handle
123        if lower.starts_with('*') {
124            section = Section::None;
125            continue;
126        }
127
128        match section {
129            Section::Vertices => {
130                // Format: ID "label" [x y z] [other attrs...]
131                // We only extract the ID and label
132                let (vid, label) = parse_vertex_line(trimmed, line_idx)?;
133                if let Some(n) = n_vertices {
134                    if vid == 0 || vid > n {
135                        return Err(IgraphError::Parse {
136                            line: line_idx.wrapping_add(1),
137                            message: format!("vertex id {vid} out of range [1, {n}]"),
138                        });
139                    }
140                }
141                if let Some(lbl) = label {
142                    has_any_label = true;
143                    let idx = (vid.wrapping_sub(1)) as usize;
144                    if idx < labels.len() {
145                        labels[idx] = lbl;
146                    }
147                }
148            }
149            Section::Edges => {
150                // Format: FROM TO [weight] [other attrs...]
151                let parts: Vec<&str> = trimmed.split_whitespace().collect();
152                if parts.len() < 2 {
153                    return Err(IgraphError::Parse {
154                        line: line_idx.wrapping_add(1),
155                        message: "edge line needs at least: FROM TO".into(),
156                    });
157                }
158                let from: u32 = parts[0].parse().map_err(|e| IgraphError::Parse {
159                    line: line_idx.wrapping_add(1),
160                    message: format!("invalid source id: {e}"),
161                })?;
162                let to: u32 = parts[1].parse().map_err(|e| IgraphError::Parse {
163                    line: line_idx.wrapping_add(1),
164                    message: format!("invalid target id: {e}"),
165                })?;
166
167                if from == 0 || to == 0 {
168                    return Err(IgraphError::Parse {
169                        line: line_idx.wrapping_add(1),
170                        message: "vertex IDs are 1-based in Pajek format".into(),
171                    });
172                }
173
174                edges.push((from.wrapping_sub(1), to.wrapping_sub(1)));
175
176                let weight = if parts.len() >= 3 {
177                    match parts[2].parse::<f64>() {
178                        Ok(w) => {
179                            has_any_weight = true;
180                            w
181                        }
182                        Err(_) => 0.0,
183                    }
184                } else {
185                    0.0
186                };
187                weights.push(weight);
188            }
189            Section::None => {
190                // Ignore lines in unknown sections
191            }
192        }
193    }
194
195    let n = n_vertices.ok_or_else(|| IgraphError::Parse {
196        line: 0,
197        message: "no *Vertices line found".into(),
198    })?;
199
200    let mut graph = Graph::new(n, directed)?;
201    graph.add_edges(edges)?;
202
203    Ok(PajekGraph {
204        graph,
205        labels: if has_any_label { Some(labels) } else { None },
206        weights: if has_any_weight { Some(weights) } else { None },
207    })
208}
209
210/// Write a graph in Pajek (`.net`) format.
211///
212/// Writes `*Vertices` section (with optional labels), then `*Edges`
213/// (undirected) or `*Arcs` (directed) section with optional weights.
214///
215/// # Examples
216///
217/// ```
218/// use rust_igraph::{Graph, write_pajek};
219///
220/// let mut g = Graph::with_vertices(3);
221/// g.add_edge(0, 1).unwrap();
222/// g.add_edge(1, 2).unwrap();
223///
224/// let labels = vec!["A".to_string(), "B".to_string(), "C".to_string()];
225/// let mut buf = Vec::new();
226/// write_pajek(&g, Some(&labels), None, &mut buf).unwrap();
227/// let s = String::from_utf8(buf).unwrap();
228/// assert!(s.contains("*Vertices 3"));
229/// assert!(s.contains("\"A\""));
230/// assert!(s.contains("*Edges"));
231/// ```
232pub fn write_pajek<W: Write>(
233    graph: &Graph,
234    labels: Option<&[String]>,
235    weights: Option<&[f64]>,
236    writer: &mut W,
237) -> IgraphResult<()> {
238    if let Some(l) = labels {
239        if l.len() != graph.vcount() as usize {
240            return Err(IgraphError::InvalidArgument(format!(
241                "labels length {} does not match vcount {}",
242                l.len(),
243                graph.vcount()
244            )));
245        }
246    }
247    if let Some(w) = weights {
248        if w.len() != graph.ecount() {
249            return Err(IgraphError::InvalidArgument(format!(
250                "weights length {} does not match ecount {}",
251                w.len(),
252                graph.ecount()
253            )));
254        }
255    }
256
257    writeln!(writer, "*Vertices {}", graph.vcount())?;
258    for v in 0..graph.vcount() {
259        let label = match labels {
260            Some(l) => format!("\"{}\"", escape_pajek_string(&l[v as usize])),
261            None => format!("\"{}\"", v + 1),
262        };
263        writeln!(writer, "{} {label}", v + 1)?;
264    }
265
266    if graph.is_directed() {
267        writeln!(writer, "*Arcs")?;
268    } else {
269        writeln!(writer, "*Edges")?;
270    }
271
272    for eid in 0..graph.ecount() {
273        #[allow(clippy::cast_possible_truncation)]
274        let (from, to) = graph.edge(eid as u32)?;
275        match weights {
276            Some(w) => writeln!(writer, "{} {} {}", from + 1, to + 1, w[eid])?,
277            None => writeln!(writer, "{} {}", from + 1, to + 1)?,
278        }
279    }
280
281    Ok(())
282}
283
284fn parse_vertex_line(line: &str, line_idx: usize) -> IgraphResult<(u32, Option<String>)> {
285    let mut chars = line.chars().peekable();
286
287    // Skip leading whitespace
288    while chars.peek().is_some_and(|c| c.is_whitespace()) {
289        chars.next();
290    }
291
292    // Read vertex ID (digits)
293    let mut id_str = String::new();
294    while chars.peek().is_some_and(char::is_ascii_digit) {
295        let Some(c) = chars.next() else { break };
296        id_str.push(c);
297    }
298
299    if id_str.is_empty() {
300        return Err(IgraphError::Parse {
301            line: line_idx.wrapping_add(1),
302            message: "expected vertex ID".into(),
303        });
304    }
305
306    let vid: u32 = id_str.parse().map_err(|e| IgraphError::Parse {
307        line: line_idx.wrapping_add(1),
308        message: format!("invalid vertex id: {e}"),
309    })?;
310
311    // Skip whitespace
312    while chars.peek().is_some_and(|c| c.is_whitespace()) {
313        chars.next();
314    }
315
316    // Try to read a quoted label
317    let label = if chars.peek() == Some(&'"') {
318        chars.next(); // consume opening quote
319        let mut lbl = String::new();
320        loop {
321            match chars.next() {
322                Some('"') | None => break,
323                Some('\\') => {
324                    if let Some(c) = chars.next() {
325                        lbl.push(c);
326                    }
327                }
328                Some(c) => lbl.push(c),
329            }
330        }
331        Some(lbl)
332    } else {
333        // Unquoted label: take next non-whitespace token
334        let mut lbl = String::new();
335        while chars.peek().is_some_and(|c| !c.is_whitespace()) {
336            let Some(c) = chars.next() else { break };
337            lbl.push(c);
338        }
339        if lbl.is_empty() { None } else { Some(lbl) }
340    };
341
342    Ok((vid, label))
343}
344
345fn escape_pajek_string(s: &str) -> String {
346    let mut out = String::with_capacity(s.len());
347    for c in s.chars() {
348        match c {
349            '"' => out.push_str("\\\""),
350            '\\' => out.push_str("\\\\"),
351            _ => out.push(c),
352        }
353    }
354    out
355}
356
357#[cfg(test)]
358mod tests {
359    use super::*;
360
361    #[test]
362    fn test_basic_undirected() {
363        let input = b"*Vertices 3\n1 \"A\"\n2 \"B\"\n3 \"C\"\n*Edges\n1 2\n2 3\n1 3\n";
364        let result = read_pajek(&input[..]).unwrap();
365        assert_eq!(result.graph.vcount(), 3);
366        assert_eq!(result.graph.ecount(), 3);
367        assert!(!result.graph.is_directed());
368        let labels = result.labels.unwrap();
369        assert_eq!(labels, vec!["A", "B", "C"]);
370    }
371
372    #[test]
373    fn test_directed_arcs() {
374        let input = b"*Vertices 2\n*Arcs\n1 2\n2 1\n";
375        let result = read_pajek(&input[..]).unwrap();
376        assert!(result.graph.is_directed());
377        assert_eq!(result.graph.ecount(), 2);
378    }
379
380    #[test]
381    fn test_weighted_edges() {
382        let input = b"*Vertices 3\n*Edges\n1 2 1.5\n2 3 2.5\n";
383        let result = read_pajek(&input[..]).unwrap();
384        let w = result.weights.unwrap();
385        assert!((w[0] - 1.5).abs() < 1e-10);
386        assert!((w[1] - 2.5).abs() < 1e-10);
387    }
388
389    #[test]
390    fn test_no_labels() {
391        let input = b"*Vertices 2\n*Edges\n1 2\n";
392        let result = read_pajek(&input[..]).unwrap();
393        assert!(result.labels.is_none());
394    }
395
396    #[test]
397    fn test_no_weights() {
398        let input = b"*Vertices 2\n*Edges\n1 2\n";
399        let result = read_pajek(&input[..]).unwrap();
400        assert!(result.weights.is_none());
401    }
402
403    #[test]
404    fn test_mixed_weights() {
405        let input = b"*Vertices 3\n*Edges\n1 2 3.0\n2 3\n";
406        let result = read_pajek(&input[..]).unwrap();
407        let w = result.weights.unwrap();
408        assert!((w[0] - 3.0).abs() < 1e-10);
409        assert!((w[1] - 0.0).abs() < 1e-10);
410    }
411
412    #[test]
413    fn test_comment_lines_skipped() {
414        let input = b"% comment\n*Vertices 2\n% another\n*Edges\n1 2\n";
415        let result = read_pajek(&input[..]).unwrap();
416        assert_eq!(result.graph.vcount(), 2);
417        assert_eq!(result.graph.ecount(), 1);
418    }
419
420    #[test]
421    fn test_case_insensitive_sections() {
422        let input = b"*vertices 2\n*edges\n1 2\n";
423        let result = read_pajek(&input[..]).unwrap();
424        assert_eq!(result.graph.vcount(), 2);
425    }
426
427    #[test]
428    fn test_no_vertices_error() {
429        let input = b"*Edges\n1 2\n";
430        let result = read_pajek(&input[..]);
431        assert!(result.is_err());
432    }
433
434    #[test]
435    fn test_zero_vertex_id_error() {
436        let input = b"*Vertices 2\n*Edges\n0 1\n";
437        let result = read_pajek(&input[..]);
438        assert!(result.is_err());
439    }
440
441    #[test]
442    fn test_vertex_id_out_of_range_error() {
443        let input = b"*Vertices 2\n1 \"A\"\n5 \"E\"\n*Edges\n1 2\n";
444        let result = read_pajek(&input[..]);
445        assert!(result.is_err());
446    }
447
448    #[test]
449    fn test_self_loop() {
450        let input = b"*Vertices 2\n*Edges\n1 1\n";
451        let result = read_pajek(&input[..]).unwrap();
452        assert_eq!(result.graph.ecount(), 1);
453    }
454
455    #[test]
456    fn test_empty_graph() {
457        let input = b"*Vertices 5\n*Edges\n";
458        let result = read_pajek(&input[..]).unwrap();
459        assert_eq!(result.graph.vcount(), 5);
460        assert_eq!(result.graph.ecount(), 0);
461    }
462
463    #[test]
464    fn test_quoted_label_with_escape() {
465        let input = b"*Vertices 1\n1 \"hello \\\"world\\\"\"\n*Edges\n";
466        let result = read_pajek(&input[..]).unwrap();
467        let labels = result.labels.unwrap();
468        assert_eq!(labels[0], "hello \"world\"");
469    }
470
471    #[test]
472    fn test_write_undirected() {
473        let mut g = Graph::with_vertices(3);
474        g.add_edge(0, 1).unwrap();
475        g.add_edge(1, 2).unwrap();
476
477        let labels = vec!["A".to_string(), "B".to_string(), "C".to_string()];
478        let mut buf = Vec::new();
479        write_pajek(&g, Some(&labels), None, &mut buf).unwrap();
480        let s = String::from_utf8(buf).unwrap();
481        assert!(s.contains("*Vertices 3"));
482        assert!(s.contains("1 \"A\""));
483        assert!(s.contains("2 \"B\""));
484        assert!(s.contains("3 \"C\""));
485        assert!(s.contains("*Edges"));
486        assert!(s.contains("1 2\n"));
487        assert!(s.contains("2 3\n"));
488    }
489
490    #[test]
491    fn test_write_directed() {
492        let mut g = Graph::new(2, true).unwrap();
493        g.add_edge(0, 1).unwrap();
494
495        let mut buf = Vec::new();
496        write_pajek(&g, None, None, &mut buf).unwrap();
497        let s = String::from_utf8(buf).unwrap();
498        assert!(s.contains("*Arcs"));
499    }
500
501    #[test]
502    fn test_write_weighted() {
503        let mut g = Graph::with_vertices(2);
504        g.add_edge(0, 1).unwrap();
505
506        let weights = vec![4.5];
507        let mut buf = Vec::new();
508        write_pajek(&g, None, Some(&weights), &mut buf).unwrap();
509        let s = String::from_utf8(buf).unwrap();
510        assert!(s.contains("1 2 4.5"));
511    }
512
513    #[test]
514    fn test_write_labels_mismatch_error() {
515        let g = Graph::with_vertices(3);
516        let labels = vec!["A".to_string()];
517        let mut buf = Vec::new();
518        assert!(write_pajek(&g, Some(&labels), None, &mut buf).is_err());
519    }
520
521    #[test]
522    fn test_write_weights_mismatch_error() {
523        let mut g = Graph::with_vertices(2);
524        g.add_edge(0, 1).unwrap();
525        let weights = vec![1.0, 2.0];
526        let mut buf = Vec::new();
527        assert!(write_pajek(&g, None, Some(&weights), &mut buf).is_err());
528    }
529
530    #[test]
531    fn test_round_trip() {
532        let input = b"*Vertices 4\n1 \"Alice\"\n2 \"Bob\"\n3 \"Carol\"\n4 \"Dave\"\n*Edges\n1 2 1.0\n2 3 2.0\n3 4 3.0\n";
533        let result = read_pajek(&input[..]).unwrap();
534
535        let mut buf = Vec::new();
536        write_pajek(
537            &result.graph,
538            result.labels.as_deref(),
539            result.weights.as_deref(),
540            &mut buf,
541        )
542        .unwrap();
543
544        let result2 = read_pajek(&buf[..]).unwrap();
545        assert_eq!(result2.graph.vcount(), result.graph.vcount());
546        assert_eq!(result2.graph.ecount(), result.graph.ecount());
547        assert_eq!(result2.labels, result.labels);
548    }
549
550    #[test]
551    fn test_blank_lines_skipped() {
552        let input = b"\n*Vertices 2\n\n1 \"X\"\n\n*Edges\n\n1 2\n\n";
553        let result = read_pajek(&input[..]).unwrap();
554        assert_eq!(result.graph.vcount(), 2);
555        assert_eq!(result.graph.ecount(), 1);
556    }
557}