Skip to main content

rust_igraph/algorithms/io/
gml.rs

1//! GML (Graph Modelling Language) I/O (ALGO-IO-001).
2//!
3//! Reads and writes graphs in GML format. GML is a hierarchical key-value
4//! format widely used in network science tools.
5//!
6//! Reference: <https://web.archive.org/web/20190207140002/http://www.fim.uni-passau.de/index.php?id=17297&L=1>
7//!
8//! We implement the structural subset: `graph [ directed 0/1  node [ id N ]
9//! edge [ source N target N ] ]`. Non-structural attributes (labels, weights,
10//! coordinates) are silently skipped during reading and not emitted during
11//! writing. This matches the igraph C `igraph_read_graph_gml` behaviour for
12//! graphs without an attribute handler installed.
13
14use std::io::{BufRead, BufReader, Read, Write};
15
16use crate::core::{Graph, IgraphError, IgraphResult};
17
18/// Read a graph from GML format.
19///
20/// Parses the first `graph [ ... ]` block. Extracts `directed` (0 or 1,
21/// default 0), `node [ id N ]` entries, and `edge [ source N target N ]`
22/// entries. All other keys/values are skipped.
23///
24/// Node ids need not be contiguous or start at 0 — the reader maps them
25/// to internal vertex indices in the order they appear.
26///
27/// # Examples
28///
29/// ```
30/// use rust_igraph::{Graph, read_gml};
31///
32/// let gml = b"graph [\n  directed 0\n  node [ id 1 ]\n  node [ id 2 ]\n  node [ id 3 ]\n  edge [ source 1 target 2 ]\n  edge [ source 2 target 3 ]\n]";
33/// let g = read_gml(&gml[..]).unwrap();
34/// assert_eq!(g.vcount(), 3);
35/// assert_eq!(g.ecount(), 2);
36/// assert!(!g.is_directed());
37/// ```
38pub fn read_gml<R: Read>(input: R) -> IgraphResult<Graph> {
39    let reader = BufReader::new(input);
40    let tokens = tokenize(reader)?;
41    parse_graph(&tokens)
42}
43
44/// Write a graph in GML format.
45///
46/// Produces a valid GML file with `graph [ directed 0/1 node [ id N ] ...
47/// edge [ source N target N ] ... ]`. Node ids are the internal vertex
48/// indices (0-based).
49///
50/// # Examples
51///
52/// ```
53/// use rust_igraph::{Graph, write_gml, read_gml};
54///
55/// let mut g = Graph::new(3, true).unwrap();
56/// g.add_edge(0, 1).unwrap();
57/// g.add_edge(1, 2).unwrap();
58///
59/// let mut buf = Vec::new();
60/// write_gml(&g, &mut buf).unwrap();
61/// let s = String::from_utf8(buf).unwrap();
62/// assert!(s.contains("directed 1"));
63///
64/// // Round-trip
65/// let g2 = read_gml(s.as_bytes()).unwrap();
66/// assert_eq!(g2.vcount(), 3);
67/// assert_eq!(g2.ecount(), 2);
68/// assert!(g2.is_directed());
69/// ```
70pub fn write_gml<W: Write>(graph: &Graph, writer: &mut W) -> IgraphResult<()> {
71    writeln!(writer, "graph")?;
72    writeln!(writer, "[")?;
73    writeln!(writer, "  directed {}", i32::from(graph.is_directed()))?;
74
75    for v in 0..graph.vcount() {
76        writeln!(writer, "  node")?;
77        writeln!(writer, "  [")?;
78        writeln!(writer, "    id {v}")?;
79        writeln!(writer, "  ]")?;
80    }
81
82    for eid in 0..graph.ecount() {
83        #[allow(clippy::cast_possible_truncation)]
84        let (src, tgt) = graph.edge(eid as u32)?;
85        writeln!(writer, "  edge")?;
86        writeln!(writer, "  [")?;
87        writeln!(writer, "    source {src}")?;
88        writeln!(writer, "    target {tgt}")?;
89        writeln!(writer, "  ]")?;
90    }
91
92    writeln!(writer, "]")?;
93    Ok(())
94}
95
96// --- Tokenizer ---
97
98#[derive(Debug, Clone, PartialEq)]
99enum Token {
100    Key(String),
101    Integer(i64),
102    Float(f64),
103    Str(String),
104    Open,  // [
105    Close, // ]
106}
107
108fn tokenize<R: BufRead>(reader: R) -> IgraphResult<Vec<Token>> {
109    let mut tokens = Vec::new();
110    let mut line_no: usize = 0;
111
112    for line_result in reader.lines() {
113        line_no = line_no.wrapping_add(1);
114        let line = line_result?;
115        let trimmed = line.trim();
116
117        // Skip comment lines (igraph C treats "comment" as a key, but
118        // lines starting with # are sometimes used informally)
119        if trimmed.is_empty() {
120            continue;
121        }
122
123        let mut chars = trimmed.chars().peekable();
124        while let Some(&ch) = chars.peek() {
125            match ch {
126                ' ' | '\t' | '\r' => {
127                    chars.next();
128                }
129                '[' => {
130                    tokens.push(Token::Open);
131                    chars.next();
132                }
133                ']' => {
134                    tokens.push(Token::Close);
135                    chars.next();
136                }
137                '"' => {
138                    chars.next(); // consume opening quote
139                    let mut s = String::new();
140                    let mut escaped = false;
141                    loop {
142                        match chars.next() {
143                            None => {
144                                return Err(IgraphError::Parse {
145                                    line: line_no,
146                                    message: "unterminated string".into(),
147                                });
148                            }
149                            Some('\\') if !escaped => {
150                                escaped = true;
151                            }
152                            Some('"') if !escaped => break,
153                            Some(c) => {
154                                if escaped {
155                                    match c {
156                                        'n' => s.push('\n'),
157                                        't' => s.push('\t'),
158                                        '\\' => s.push('\\'),
159                                        '"' => s.push('"'),
160                                        _ => {
161                                            s.push('\\');
162                                            s.push(c);
163                                        }
164                                    }
165                                    escaped = false;
166                                } else {
167                                    s.push(c);
168                                }
169                            }
170                        }
171                    }
172                    // Decode basic XML entities
173                    let decoded = decode_entities(&s);
174                    tokens.push(Token::Str(decoded));
175                }
176                '#' => break, // rest of line is comment
177                _ => {
178                    // Collect a word (key or number)
179                    let mut word = String::new();
180                    while let Some(&c) = chars.peek() {
181                        if c == ' ' || c == '\t' || c == '[' || c == ']' || c == '"' {
182                            break;
183                        }
184                        word.push(c);
185                        chars.next();
186                    }
187
188                    if word.is_empty() {
189                        return Err(IgraphError::Parse {
190                            line: line_no,
191                            message: format!("unexpected character '{ch}'"),
192                        });
193                    }
194
195                    // Try integer first, then float, otherwise it's a key
196                    if let Ok(i) = word.parse::<i64>() {
197                        tokens.push(Token::Integer(i));
198                    } else if let Ok(f) = parse_gml_float(&word) {
199                        tokens.push(Token::Float(f));
200                    } else {
201                        tokens.push(Token::Key(word));
202                    }
203                }
204            }
205        }
206    }
207
208    Ok(tokens)
209}
210
211fn parse_gml_float(s: &str) -> Result<f64, ()> {
212    let lower = s.to_ascii_lowercase();
213    match lower.as_str() {
214        "inf" | "+inf" => Ok(f64::INFINITY),
215        "-inf" => Ok(f64::NEG_INFINITY),
216        "nan" => Ok(f64::NAN),
217        _ => s.parse::<f64>().map_err(|_| ()),
218    }
219}
220
221fn decode_entities(s: &str) -> String {
222    s.replace("&amp;", "&")
223        .replace("&quot;", "\"")
224        .replace("&apos;", "'")
225        .replace("&lt;", "<")
226        .replace("&gt;", ">")
227}
228
229// --- Parser ---
230
231fn parse_graph(tokens: &[Token]) -> IgraphResult<Graph> {
232    // Find "graph" "[" ... "]"
233    let mut pos = 0;
234
235    // Skip top-level keys until we find "graph"
236    while pos < tokens.len() {
237        match &tokens[pos] {
238            Token::Key(k) if k == "graph" => {
239                pos += 1;
240                break;
241            }
242            Token::Key(_) => {
243                pos += 1;
244                // Skip the value (could be a number, string, or nested block)
245                pos = skip_value(tokens, pos);
246            }
247            _ => {
248                pos += 1;
249            }
250        }
251    }
252
253    // Expect "["
254    if pos >= tokens.len() {
255        return Err(IgraphError::Parse {
256            line: 0,
257            message: "no 'graph' block found in GML input".into(),
258        });
259    }
260    if tokens[pos] != Token::Open {
261        return Err(IgraphError::Parse {
262            line: 0,
263            message: "expected '[' after 'graph'".into(),
264        });
265    }
266    pos += 1;
267
268    let mut directed = false;
269    let mut node_ids: Vec<i64> = Vec::new();
270    let mut edges: Vec<(i64, i64)> = Vec::new();
271
272    // Parse inside graph block
273    while pos < tokens.len() && tokens[pos] != Token::Close {
274        match &tokens[pos] {
275            Token::Key(k) => {
276                let key = k.clone();
277                pos += 1;
278
279                match key.as_str() {
280                    "directed" => {
281                        if let Some(Token::Integer(v)) = tokens.get(pos) {
282                            directed = *v != 0;
283                            pos += 1;
284                        } else {
285                            pos = skip_value(tokens, pos);
286                        }
287                    }
288                    "node" if pos < tokens.len() && tokens[pos] == Token::Open => {
289                        pos += 1;
290                        let (node_id, new_pos) = parse_node(tokens, pos)?;
291                        node_ids.push(node_id);
292                        pos = new_pos;
293                    }
294                    "edge" if pos < tokens.len() && tokens[pos] == Token::Open => {
295                        pos += 1;
296                        let (edge, new_pos) = parse_edge(tokens, pos)?;
297                        edges.push(edge);
298                        pos = new_pos;
299                    }
300                    _ => {
301                        pos = skip_value(tokens, pos);
302                    }
303                }
304            }
305            _ => {
306                return Err(IgraphError::Parse {
307                    line: 0,
308                    message: format!("unexpected token in graph block: {:?}", tokens[pos]),
309                });
310            }
311        }
312    }
313
314    // Skip closing "]"
315    if pos < tokens.len() && tokens[pos] == Token::Close {
316        // consumed
317    }
318
319    // Build the graph
320    build_graph(directed, &node_ids, &edges)
321}
322
323fn parse_node(tokens: &[Token], mut pos: usize) -> IgraphResult<(i64, usize)> {
324    let mut node_id: Option<i64> = None;
325
326    while pos < tokens.len() && tokens[pos] != Token::Close {
327        match &tokens[pos] {
328            Token::Key(k) => {
329                let key = k.clone();
330                pos += 1;
331                if key == "id" {
332                    if let Some(Token::Integer(v)) = tokens.get(pos) {
333                        node_id = Some(*v);
334                        pos += 1;
335                    } else {
336                        pos = skip_value(tokens, pos);
337                    }
338                } else {
339                    pos = skip_value(tokens, pos);
340                }
341            }
342            _ => {
343                return Err(IgraphError::Parse {
344                    line: 0,
345                    message: format!("unexpected token in node block: {:?}", tokens[pos]),
346                });
347            }
348        }
349    }
350
351    // Skip closing "]"
352    if pos < tokens.len() && tokens[pos] == Token::Close {
353        pos += 1;
354    }
355
356    let id = node_id.ok_or_else(|| IgraphError::Parse {
357        line: 0,
358        message: "node without 'id' field".into(),
359    })?;
360
361    Ok((id, pos))
362}
363
364fn parse_edge(tokens: &[Token], mut pos: usize) -> IgraphResult<((i64, i64), usize)> {
365    let mut source: Option<i64> = None;
366    let mut target: Option<i64> = None;
367
368    while pos < tokens.len() && tokens[pos] != Token::Close {
369        match &tokens[pos] {
370            Token::Key(k) => {
371                let key = k.clone();
372                pos += 1;
373                match key.as_str() {
374                    "source" => {
375                        if let Some(Token::Integer(v)) = tokens.get(pos) {
376                            source = Some(*v);
377                            pos += 1;
378                        } else {
379                            pos = skip_value(tokens, pos);
380                        }
381                    }
382                    "target" => {
383                        if let Some(Token::Integer(v)) = tokens.get(pos) {
384                            target = Some(*v);
385                            pos += 1;
386                        } else {
387                            pos = skip_value(tokens, pos);
388                        }
389                    }
390                    _ => {
391                        pos = skip_value(tokens, pos);
392                    }
393                }
394            }
395            _ => {
396                return Err(IgraphError::Parse {
397                    line: 0,
398                    message: format!("unexpected token in edge block: {:?}", tokens[pos]),
399                });
400            }
401        }
402    }
403
404    // Skip closing "]"
405    if pos < tokens.len() && tokens[pos] == Token::Close {
406        pos += 1;
407    }
408
409    let src = source.ok_or_else(|| IgraphError::Parse {
410        line: 0,
411        message: "edge without 'source' field".into(),
412    })?;
413    let tgt = target.ok_or_else(|| IgraphError::Parse {
414        line: 0,
415        message: "edge without 'target' field".into(),
416    })?;
417
418    Ok(((src, tgt), pos))
419}
420
421fn skip_value(tokens: &[Token], mut pos: usize) -> usize {
422    if pos >= tokens.len() {
423        return pos;
424    }
425    match &tokens[pos] {
426        Token::Integer(_) | Token::Float(_) | Token::Str(_) | Token::Key(_) => pos + 1,
427        Token::Open => {
428            pos += 1;
429            let mut depth: u32 = 1;
430            while pos < tokens.len() && depth > 0 {
431                match &tokens[pos] {
432                    Token::Open => depth = depth.saturating_add(1),
433                    Token::Close => depth = depth.saturating_sub(1),
434                    _ => {}
435                }
436                pos += 1;
437            }
438            pos
439        }
440        Token::Close => pos,
441    }
442}
443
444fn build_graph(directed: bool, node_ids: &[i64], edges: &[(i64, i64)]) -> IgraphResult<Graph> {
445    use std::collections::HashMap;
446
447    // Map external GML ids to internal 0-based indices
448    let mut id_map: HashMap<i64, u32> = HashMap::with_capacity(node_ids.len());
449    for (idx, &gml_id) in node_ids.iter().enumerate() {
450        #[allow(clippy::cast_possible_truncation)]
451        let internal_id = idx as u32;
452        if id_map.insert(gml_id, internal_id).is_some() {
453            return Err(IgraphError::Parse {
454                line: 0,
455                message: format!("duplicate node id {gml_id}"),
456            });
457        }
458    }
459
460    #[allow(clippy::cast_possible_truncation)]
461    let n = node_ids.len() as u32;
462    let mut graph = Graph::new(n, directed)?;
463
464    let mut edge_list: Vec<(u32, u32)> = Vec::with_capacity(edges.len());
465    for &(src_gml, tgt_gml) in edges {
466        let src = id_map.get(&src_gml).ok_or_else(|| IgraphError::Parse {
467            line: 0,
468            message: format!("edge references unknown node id {src_gml}"),
469        })?;
470        let tgt = id_map.get(&tgt_gml).ok_or_else(|| IgraphError::Parse {
471            line: 0,
472            message: format!("edge references unknown node id {tgt_gml}"),
473        })?;
474        edge_list.push((*src, *tgt));
475    }
476
477    graph.add_edges(edge_list)?;
478    Ok(graph)
479}
480
481#[cfg(test)]
482mod tests {
483    use super::*;
484
485    #[test]
486    fn test_empty_graph() {
487        let gml = b"graph [ ]";
488        let g = read_gml(&gml[..]).unwrap();
489        assert_eq!(g.vcount(), 0);
490        assert_eq!(g.ecount(), 0);
491        assert!(!g.is_directed());
492    }
493
494    #[test]
495    fn test_directed_flag() {
496        let gml = b"graph [ directed 1 node [ id 0 ] node [ id 1 ] edge [ source 0 target 1 ] ]";
497        let g = read_gml(&gml[..]).unwrap();
498        assert!(g.is_directed());
499        assert_eq!(g.vcount(), 2);
500        assert_eq!(g.ecount(), 1);
501    }
502
503    #[test]
504    fn test_undirected_default() {
505        let gml = b"graph [ node [ id 0 ] node [ id 1 ] edge [ source 0 target 1 ] ]";
506        let g = read_gml(&gml[..]).unwrap();
507        assert!(!g.is_directed());
508    }
509
510    #[test]
511    fn test_non_contiguous_ids() {
512        let gml = b"graph [\n  node [ id 10 ]\n  node [ id 20 ]\n  node [ id 30 ]\n  edge [ source 10 target 30 ]\n]";
513        let g = read_gml(&gml[..]).unwrap();
514        assert_eq!(g.vcount(), 3);
515        assert_eq!(g.ecount(), 1);
516        // node 10 -> internal 0, node 30 -> internal 2
517        let (src, tgt) = g.edge(0).unwrap();
518        assert_eq!(src, 0);
519        assert_eq!(tgt, 2);
520    }
521
522    #[test]
523    fn test_multiline() {
524        let gml = b"Creator \"test\"\ngraph\n[\n  directed 0\n  node\n  [\n    id 0\n    label \"A\"\n  ]\n  node\n  [\n    id 1\n    label \"B\"\n  ]\n  edge\n  [\n    source 0\n    target 1\n    weight 1.5\n  ]\n]\n";
525        let g = read_gml(&gml[..]).unwrap();
526        assert_eq!(g.vcount(), 2);
527        assert_eq!(g.ecount(), 1);
528    }
529
530    #[test]
531    fn test_self_loop() {
532        let gml = b"graph [ node [ id 0 ] edge [ source 0 target 0 ] ]";
533        let g = read_gml(&gml[..]).unwrap();
534        assert_eq!(g.ecount(), 1);
535        let (s, t) = g.edge(0).unwrap();
536        assert_eq!(s, t);
537    }
538
539    #[test]
540    fn test_multiple_edges() {
541        let gml = b"graph [ node [ id 0 ] node [ id 1 ] edge [ source 0 target 1 ] edge [ source 0 target 1 ] ]";
542        let g = read_gml(&gml[..]).unwrap();
543        assert_eq!(g.ecount(), 2);
544    }
545
546    #[test]
547    fn test_error_missing_graph() {
548        let gml = b"node [ id 0 ]";
549        let result = read_gml(&gml[..]);
550        assert!(result.is_err());
551    }
552
553    #[test]
554    fn test_error_duplicate_node_id() {
555        let gml = b"graph [ node [ id 0 ] node [ id 0 ] ]";
556        let result = read_gml(&gml[..]);
557        assert!(result.is_err());
558    }
559
560    #[test]
561    fn test_error_unknown_edge_target() {
562        let gml = b"graph [ node [ id 0 ] edge [ source 0 target 99 ] ]";
563        let result = read_gml(&gml[..]);
564        assert!(result.is_err());
565    }
566
567    #[test]
568    fn test_error_node_without_id() {
569        let gml = b"graph [ node [ label \"x\" ] ]";
570        let result = read_gml(&gml[..]);
571        assert!(result.is_err());
572    }
573
574    #[test]
575    fn test_error_edge_without_source() {
576        let gml = b"graph [ node [ id 0 ] node [ id 1 ] edge [ target 1 ] ]";
577        let result = read_gml(&gml[..]);
578        assert!(result.is_err());
579    }
580
581    #[test]
582    fn test_write_read_round_trip_undirected() {
583        let mut g = Graph::with_vertices(4);
584        g.add_edge(0, 1).unwrap();
585        g.add_edge(1, 2).unwrap();
586        g.add_edge(2, 3).unwrap();
587        g.add_edge(3, 0).unwrap();
588
589        let mut buf = Vec::new();
590        write_gml(&g, &mut buf).unwrap();
591
592        let g2 = read_gml(&buf[..]).unwrap();
593        assert_eq!(g2.vcount(), 4);
594        assert_eq!(g2.ecount(), 4);
595        assert!(!g2.is_directed());
596    }
597
598    #[test]
599    fn test_write_read_round_trip_directed() {
600        let mut g = Graph::new(3, true).unwrap();
601        g.add_edge(0, 1).unwrap();
602        g.add_edge(1, 2).unwrap();
603        g.add_edge(2, 0).unwrap();
604
605        let mut buf = Vec::new();
606        write_gml(&g, &mut buf).unwrap();
607
608        let g2 = read_gml(&buf[..]).unwrap();
609        assert_eq!(g2.vcount(), 3);
610        assert_eq!(g2.ecount(), 3);
611        assert!(g2.is_directed());
612
613        for eid in 0..3u32 {
614            let (s1, t1) = g.edge(eid).unwrap();
615            let (s2, t2) = g2.edge(eid).unwrap();
616            assert_eq!(s1, s2);
617            assert_eq!(t1, t2);
618        }
619    }
620
621    #[test]
622    fn test_write_empty() {
623        let g = Graph::with_vertices(0);
624        let mut buf = Vec::new();
625        write_gml(&g, &mut buf).unwrap();
626        let s = String::from_utf8(buf).unwrap();
627        assert!(s.contains("graph"));
628        assert!(s.contains("directed 0"));
629    }
630
631    #[test]
632    fn test_string_with_entities() {
633        let gml = b"graph [ node [ id 0 label \"a&amp;b\" ] ]";
634        let g = read_gml(&gml[..]).unwrap();
635        assert_eq!(g.vcount(), 1);
636    }
637
638    #[test]
639    fn test_top_level_creator_ignored() {
640        let gml = b"Creator \"Someone\"\nVersion 1\ngraph [ node [ id 0 ] ]";
641        let g = read_gml(&gml[..]).unwrap();
642        assert_eq!(g.vcount(), 1);
643    }
644
645    #[test]
646    fn test_negative_node_id() {
647        let gml = b"graph [ node [ id -5 ] node [ id -3 ] edge [ source -5 target -3 ] ]";
648        let g = read_gml(&gml[..]).unwrap();
649        assert_eq!(g.vcount(), 2);
650        assert_eq!(g.ecount(), 1);
651    }
652
653    #[test]
654    fn test_lesmis_style_header() {
655        let gml = b"Creator \"Mark Newman on Fri Jul 21 12:44:53 2006\"\ngraph\n[\n  node\n  [\n    id 0\n    label \"Myriel\"\n  ]\n  node\n  [\n    id 1\n    label \"Napoleon\"\n  ]\n  edge\n  [\n    source 0\n    target 1\n    value 1\n  ]\n]\n";
656        let g = read_gml(&gml[..]).unwrap();
657        assert_eq!(g.vcount(), 2);
658        assert_eq!(g.ecount(), 1);
659    }
660}