Skip to main content

rust_igraph/algorithms/io/
dl.rs

1//! UCINET DL format reader (ALGO-IO-009).
2//!
3//! Reads graphs in the DL format used by UCINET. This is a read-only
4//! text format supporting three data representations:
5//!
6//! - **fullmatrix**: adjacency matrix of 0/1 values
7//! - **edgelist1**: pairs of 1-based vertex IDs with optional weights
8//! - **nodelist1**: source vertex followed by its neighbors (1-based)
9//!
10//! ```text
11//! DL n=5
12//! format = edgelist1
13//! data:
14//! 1 2
15//! 1 3
16//! 2 3
17//! ```
18//!
19//! Vertex labels can be provided via `labels:` or `labels embedded:`.
20//!
21//! Counterpart of `igraph_read_graph_dl`.
22
23use std::io::{BufRead, BufReader, Read};
24
25use crate::core::{Graph, IgraphError, IgraphResult};
26
27/// Result of reading a DL file.
28#[derive(Debug, Clone)]
29pub struct DlGraph {
30    /// The parsed graph.
31    pub graph: Graph,
32    /// Vertex labels (if provided).
33    pub labels: Option<Vec<String>>,
34    /// Edge weights (if provided, only for edgelist1 format).
35    pub weights: Option<Vec<f64>>,
36}
37
38#[derive(Debug, Clone, Copy, PartialEq)]
39enum DlFormat {
40    FullMatrix,
41    EdgeList1,
42    NodeList1,
43}
44
45/// Read a graph from UCINET DL format.
46///
47/// Supports fullmatrix, edgelist1, and nodelist1 data formats.
48/// Case-insensitive keywords. Vertex IDs in edgelist1 and nodelist1
49/// are 1-based.
50///
51/// # Examples
52///
53/// ```
54/// use rust_igraph::read_dl;
55///
56/// let input = b"DL n=3\nformat = edgelist1\ndata:\n1 2\n2 3\n1 3\n";
57/// let result = read_dl(&input[..], true).unwrap();
58/// assert_eq!(result.graph.vcount(), 3);
59/// assert_eq!(result.graph.ecount(), 3);
60/// ```
61#[allow(clippy::too_many_lines)]
62pub fn read_dl<R: Read>(input: R, directed: bool) -> IgraphResult<DlGraph> {
63    let reader = BufReader::new(input);
64    let mut lines: Vec<String> = Vec::new();
65
66    for line_result in reader.lines() {
67        let line = line_result?;
68        lines.push(line);
69    }
70
71    let mut pos = 0;
72    let mut n_vertices: Option<u32> = None;
73    let mut format = DlFormat::FullMatrix;
74    let mut labels: Vec<String> = Vec::new();
75    let mut labels_embedded = false;
76
77    // Parse header: DL n=N
78    pos = skip_empty(&lines, pos);
79    if pos >= lines.len() {
80        return Err(parse_err(0, "empty DL file"));
81    }
82
83    let header_line = lines[pos].trim().to_ascii_lowercase();
84    if !header_line.starts_with("dl") {
85        return Err(parse_err(pos + 1, "DL file must start with 'DL'"));
86    }
87
88    // Extract n= from header line or subsequent lines
89    let header_rest = &lines[pos].trim()[2..];
90    if let Some(n) = extract_n(header_rest) {
91        n_vertices = Some(n);
92    }
93    pos += 1;
94
95    // Parse directives until DATA:
96    loop {
97        pos = skip_empty(&lines, pos);
98        if pos >= lines.len() {
99            break;
100        }
101
102        let trimmed = lines[pos].trim();
103        let lower = trimmed.to_ascii_lowercase();
104
105        if lower.starts_with("data:") || lower == "data" {
106            pos += 1;
107            break;
108        }
109
110        if lower.starts_with("n=") || lower.starts_with("n =") {
111            if n_vertices.is_none() {
112                let val =
113                    extract_n(trimmed).ok_or_else(|| parse_err(pos + 1, "invalid n= value"))?;
114                n_vertices = Some(val);
115            }
116            pos += 1;
117            continue;
118        }
119
120        if lower.starts_with("format") {
121            if lower.contains("fullmatrix") || lower.contains("full matrix") {
122                format = DlFormat::FullMatrix;
123            } else if lower.contains("edgelist1")
124                || lower.contains("edge list1")
125                || lower.contains("edgelist 1")
126            {
127                format = DlFormat::EdgeList1;
128            } else if lower.contains("nodelist1")
129                || lower.contains("node list1")
130                || lower.contains("nodelist 1")
131            {
132                format = DlFormat::NodeList1;
133            }
134            pos += 1;
135            continue;
136        }
137
138        if lower.starts_with("labels embedded") {
139            labels_embedded = true;
140            pos += 1;
141            continue;
142        }
143
144        if lower.starts_with("labels:") || lower == "labels" {
145            pos += 1;
146            // Read label lines until next keyword or data:
147            while pos < lines.len() {
148                let lbl_line = lines[pos].trim();
149                let lbl_lower = lbl_line.to_ascii_lowercase();
150                if lbl_lower.starts_with("data")
151                    || lbl_lower.starts_with("format")
152                    || lbl_lower.starts_with("labels")
153                {
154                    break;
155                }
156                if !lbl_line.is_empty() {
157                    // Labels can be comma or whitespace separated on one line
158                    for token in split_labels(lbl_line) {
159                        labels.push(token);
160                    }
161                }
162                pos += 1;
163            }
164            continue;
165        }
166
167        // Try to extract n= from lines like "N = 5"
168        if let Some(n) = extract_n(trimmed) {
169            if n_vertices.is_none() {
170                n_vertices = Some(n);
171            }
172            pos += 1;
173            continue;
174        }
175
176        pos += 1;
177    }
178
179    let n = n_vertices.ok_or_else(|| parse_err(0, "no vertex count (n=) found in DL file"))?;
180
181    let mut edges: Vec<(u32, u32)> = Vec::new();
182    let mut weights: Vec<f64> = Vec::new();
183    let mut has_weights = false;
184
185    match format {
186        DlFormat::FullMatrix => {
187            if labels_embedded {
188                // First row is label row, then labeled rows
189                pos = skip_empty(&lines, pos);
190                if pos < lines.len() {
191                    let header_labels = split_labels(lines[pos].trim());
192                    if labels.is_empty() {
193                        labels = header_labels;
194                    }
195                    pos += 1;
196                }
197                let mut row = 0u32;
198                while pos < lines.len() && row < n {
199                    let trimmed = lines[pos].trim();
200                    if trimmed.is_empty() {
201                        pos += 1;
202                        continue;
203                    }
204                    let tokens: Vec<&str> = trimmed.split_whitespace().collect();
205                    if tokens.is_empty() {
206                        pos += 1;
207                        continue;
208                    }
209                    // First token is the row label (skip), rest are 0/1
210                    let data_start = 1;
211                    for (col, &val) in tokens.iter().skip(data_start).enumerate() {
212                        if val == "1" {
213                            #[allow(clippy::cast_possible_truncation)]
214                            edges.push((row, col as u32));
215                        }
216                    }
217                    row += 1;
218                    pos += 1;
219                }
220            } else {
221                let mut row = 0u32;
222                while pos < lines.len() && row < n {
223                    let trimmed = lines[pos].trim();
224                    if trimmed.is_empty() {
225                        pos += 1;
226                        continue;
227                    }
228                    for (col, ch) in trimmed.split_whitespace().enumerate() {
229                        if ch == "1" {
230                            #[allow(clippy::cast_possible_truncation)]
231                            edges.push((row, col as u32));
232                        }
233                    }
234                    row += 1;
235                    pos += 1;
236                }
237            }
238        }
239        DlFormat::EdgeList1 => {
240            if labels_embedded {
241                while pos < lines.len() {
242                    let trimmed = lines[pos].trim();
243                    if trimmed.is_empty() {
244                        pos += 1;
245                        continue;
246                    }
247                    let tokens: Vec<&str> = trimmed.split_whitespace().collect();
248                    if tokens.len() < 2 {
249                        pos += 1;
250                        continue;
251                    }
252                    let from_label = tokens[0].to_string();
253                    let to_label = tokens[1].to_string();
254                    let from_id = get_or_add_label(&mut labels, &from_label);
255                    let to_id = get_or_add_label(&mut labels, &to_label);
256                    edges.push((from_id, to_id));
257                    if tokens.len() >= 3 {
258                        if let Ok(w) = tokens[2].parse::<f64>() {
259                            has_weights = true;
260                            weights.push(w);
261                        } else {
262                            weights.push(0.0);
263                        }
264                    } else {
265                        weights.push(0.0);
266                    }
267                    pos += 1;
268                }
269            } else {
270                while pos < lines.len() {
271                    let trimmed = lines[pos].trim();
272                    if trimmed.is_empty() {
273                        pos += 1;
274                        continue;
275                    }
276                    let tokens: Vec<&str> = trimmed.split_whitespace().collect();
277                    if tokens.len() < 2 {
278                        pos += 1;
279                        continue;
280                    }
281                    let from: u32 = tokens[0]
282                        .parse()
283                        .map_err(|e| parse_err(pos + 1, &format!("invalid source id: {e}")))?;
284                    let to: u32 = tokens[1]
285                        .parse()
286                        .map_err(|e| parse_err(pos + 1, &format!("invalid target id: {e}")))?;
287                    if from == 0 || to == 0 || from > n || to > n {
288                        return Err(parse_err(
289                            pos + 1,
290                            &format!("vertex ID out of range: {from} {to} (n={n})"),
291                        ));
292                    }
293                    edges.push((from - 1, to - 1));
294                    if tokens.len() >= 3 {
295                        if let Ok(w) = tokens[2].parse::<f64>() {
296                            has_weights = true;
297                            weights.push(w);
298                        } else {
299                            weights.push(0.0);
300                        }
301                    } else {
302                        weights.push(0.0);
303                    }
304                    pos += 1;
305                }
306            }
307        }
308        DlFormat::NodeList1 => {
309            if labels_embedded {
310                while pos < lines.len() {
311                    let trimmed = lines[pos].trim();
312                    if trimmed.is_empty() {
313                        pos += 1;
314                        continue;
315                    }
316                    let tokens: Vec<&str> = trimmed.split_whitespace().collect();
317                    if tokens.is_empty() {
318                        pos += 1;
319                        continue;
320                    }
321                    let from_label = tokens[0].to_string();
322                    let from_id = get_or_add_label(&mut labels, &from_label);
323                    for &tok in &tokens[1..] {
324                        let to_label = tok.to_string();
325                        let to_id = get_or_add_label(&mut labels, &to_label);
326                        edges.push((from_id, to_id));
327                    }
328                    pos += 1;
329                }
330            } else {
331                while pos < lines.len() {
332                    let trimmed = lines[pos].trim();
333                    if trimmed.is_empty() {
334                        pos += 1;
335                        continue;
336                    }
337                    let tokens: Vec<&str> = trimmed.split_whitespace().collect();
338                    if tokens.is_empty() {
339                        pos += 1;
340                        continue;
341                    }
342                    let from: u32 = tokens[0]
343                        .parse()
344                        .map_err(|e| parse_err(pos + 1, &format!("invalid source id: {e}")))?;
345                    if from == 0 || from > n {
346                        return Err(parse_err(
347                            pos + 1,
348                            &format!("source vertex ID out of range: {from} (n={n})"),
349                        ));
350                    }
351                    for &tok in &tokens[1..] {
352                        let to: u32 = tok
353                            .parse()
354                            .map_err(|e| parse_err(pos + 1, &format!("invalid target id: {e}")))?;
355                        if to == 0 || to > n {
356                            return Err(parse_err(
357                                pos + 1,
358                                &format!("target vertex ID out of range: {to} (n={n})"),
359                            ));
360                        }
361                        edges.push((from - 1, to - 1));
362                    }
363                    pos += 1;
364                }
365            }
366        }
367    }
368
369    let mut graph = Graph::new(n, directed)?;
370    graph.add_edges(edges)?;
371
372    let final_labels = if labels.is_empty() {
373        None
374    } else {
375        Some(labels)
376    };
377    let final_weights = if has_weights { Some(weights) } else { None };
378
379    Ok(DlGraph {
380        graph,
381        labels: final_labels,
382        weights: final_weights,
383    })
384}
385
386fn parse_err(line: usize, msg: &str) -> IgraphError {
387    IgraphError::Parse {
388        line,
389        message: msg.to_string(),
390    }
391}
392
393fn skip_empty(lines: &[String], mut pos: usize) -> usize {
394    while pos < lines.len() && lines[pos].trim().is_empty() {
395        pos += 1;
396    }
397    pos
398}
399
400fn extract_n(s: &str) -> Option<u32> {
401    let lower = s.to_ascii_lowercase();
402    // Look for n=N or n = N pattern
403    for part in lower.split([',', ';']) {
404        let trimmed = part.trim();
405        if let Some(after_n) = trimmed.strip_prefix('n') {
406            let rest = after_n.trim();
407            if let Some(stripped) = rest.strip_prefix('=') {
408                if let Ok(val) = stripped.trim().parse::<u32>() {
409                    return Some(val);
410                }
411            }
412        }
413    }
414    None
415}
416
417fn split_labels(s: &str) -> Vec<String> {
418    if s.contains(',') {
419        s.split(',')
420            .map(|t| t.trim().trim_matches('"').to_string())
421            .filter(|t| !t.is_empty())
422            .collect()
423    } else {
424        s.split_whitespace()
425            .map(|t| t.trim_matches('"').to_string())
426            .collect()
427    }
428}
429
430fn get_or_add_label(labels: &mut Vec<String>, name: &str) -> u32 {
431    for (i, lbl) in labels.iter().enumerate() {
432        if lbl == name {
433            #[allow(clippy::cast_possible_truncation)]
434            return i as u32;
435        }
436    }
437    labels.push(name.to_string());
438    #[allow(clippy::cast_possible_truncation)]
439    let id = (labels.len() - 1) as u32;
440    id
441}
442
443#[cfg(test)]
444mod tests {
445    use super::*;
446
447    #[test]
448    fn test_edgelist1_basic() {
449        let input = b"DL n=3\nformat = edgelist1\ndata:\n1 2\n2 3\n1 3\n";
450        let result = read_dl(&input[..], true).unwrap();
451        assert_eq!(result.graph.vcount(), 3);
452        assert_eq!(result.graph.ecount(), 3);
453        assert!(result.graph.is_directed());
454    }
455
456    #[test]
457    fn test_edgelist1_undirected() {
458        let input = b"DL n=3\nformat = edgelist1\ndata:\n1 2\n2 3\n";
459        let result = read_dl(&input[..], false).unwrap();
460        assert_eq!(result.graph.vcount(), 3);
461        assert_eq!(result.graph.ecount(), 2);
462        assert!(!result.graph.is_directed());
463    }
464
465    #[test]
466    fn test_edgelist1_with_weights() {
467        let input = b"DL n=3\nformat = edgelist1\ndata:\n1 2 1.5\n2 3 2.5\n";
468        let result = read_dl(&input[..], true).unwrap();
469        let w = result.weights.unwrap();
470        assert!((w[0] - 1.5).abs() < 1e-10);
471        assert!((w[1] - 2.5).abs() < 1e-10);
472    }
473
474    #[test]
475    fn test_edgelist1_with_labels() {
476        let input = b"DL n=3\nformat = edgelist1\nlabels:\nA,B,C\ndata:\n1 2\n2 3\n";
477        let result = read_dl(&input[..], true).unwrap();
478        let labels = result.labels.unwrap();
479        assert_eq!(labels, vec!["A", "B", "C"]);
480    }
481
482    #[test]
483    fn test_edgelist1_labels_embedded() {
484        let input = b"DL n=3\nformat = edgelist1\nlabels embedded:\ndata:\nAlice Bob\nBob Carol\n";
485        let result = read_dl(&input[..], true).unwrap();
486        assert_eq!(result.graph.ecount(), 2);
487        let labels = result.labels.unwrap();
488        assert_eq!(labels[0], "Alice");
489        assert_eq!(labels[1], "Bob");
490        assert_eq!(labels[2], "Carol");
491    }
492
493    #[test]
494    fn test_fullmatrix_basic() {
495        let input = b"DL n=3\nformat = fullmatrix\ndata:\n0 1 0\n0 0 1\n1 0 0\n";
496        let result = read_dl(&input[..], true).unwrap();
497        assert_eq!(result.graph.vcount(), 3);
498        assert_eq!(result.graph.ecount(), 3);
499    }
500
501    #[test]
502    fn test_fullmatrix_default_format() {
503        // No format line = default fullmatrix
504        let input = b"DL n=3\ndata:\n0 1 1\n1 0 0\n0 1 0\n";
505        let result = read_dl(&input[..], true).unwrap();
506        assert_eq!(result.graph.vcount(), 3);
507        assert_eq!(result.graph.ecount(), 4);
508    }
509
510    #[test]
511    fn test_fullmatrix_labels_embedded() {
512        let input = b"DL n=3\nlabels embedded:\ndata:\nA B C\nA 0 1 0\nB 0 0 1\nC 1 0 0\n";
513        let result = read_dl(&input[..], true).unwrap();
514        assert_eq!(result.graph.vcount(), 3);
515        assert_eq!(result.graph.ecount(), 3);
516        let labels = result.labels.unwrap();
517        assert_eq!(labels, vec!["A", "B", "C"]);
518    }
519
520    #[test]
521    fn test_nodelist1_basic() {
522        let input = b"DL n=4\nformat = nodelist1\ndata:\n1 2 3\n2 3\n3 4\n";
523        let result = read_dl(&input[..], true).unwrap();
524        assert_eq!(result.graph.vcount(), 4);
525        assert_eq!(result.graph.ecount(), 4);
526    }
527
528    #[test]
529    fn test_nodelist1_labels_embedded() {
530        let input = b"DL n=3\nformat = nodelist1\nlabels embedded:\ndata:\nA B C\nB C\n";
531        let result = read_dl(&input[..], true).unwrap();
532        assert_eq!(result.graph.ecount(), 3);
533        let labels = result.labels.unwrap();
534        assert_eq!(labels[0], "A");
535    }
536
537    #[test]
538    fn test_case_insensitive() {
539        let input = b"dl N=2\nFORMAT = EDGELIST1\nDATA:\n1 2\n";
540        let result = read_dl(&input[..], true).unwrap();
541        assert_eq!(result.graph.vcount(), 2);
542        assert_eq!(result.graph.ecount(), 1);
543    }
544
545    #[test]
546    fn test_empty_graph() {
547        let input = b"DL n=5\nformat = edgelist1\ndata:\n";
548        let result = read_dl(&input[..], true).unwrap();
549        assert_eq!(result.graph.vcount(), 5);
550        assert_eq!(result.graph.ecount(), 0);
551    }
552
553    #[test]
554    fn test_no_dl_header_error() {
555        let input = b"n=3\ndata:\n1 2\n";
556        let result = read_dl(&input[..], true);
557        assert!(result.is_err());
558    }
559
560    #[test]
561    fn test_no_n_error() {
562        let input = b"DL\nformat = edgelist1\ndata:\n1 2\n";
563        let result = read_dl(&input[..], true);
564        assert!(result.is_err());
565    }
566
567    #[test]
568    fn test_vertex_id_out_of_range() {
569        let input = b"DL n=2\nformat = edgelist1\ndata:\n1 5\n";
570        let result = read_dl(&input[..], true);
571        assert!(result.is_err());
572    }
573
574    #[test]
575    fn test_zero_vertex_id_error() {
576        let input = b"DL n=2\nformat = edgelist1\ndata:\n0 1\n";
577        let result = read_dl(&input[..], true);
578        assert!(result.is_err());
579    }
580
581    #[test]
582    fn test_n_on_same_line() {
583        let input = b"DL n=4\nformat=edgelist1\ndata:\n1 2\n3 4\n";
584        let result = read_dl(&input[..], true).unwrap();
585        assert_eq!(result.graph.vcount(), 4);
586        assert_eq!(result.graph.ecount(), 2);
587    }
588
589    #[test]
590    fn test_labels_whitespace_separated() {
591        let input = b"DL n=3\nformat = edgelist1\nlabels:\nAlpha Beta Gamma\ndata:\n1 2\n";
592        let result = read_dl(&input[..], true).unwrap();
593        let labels = result.labels.unwrap();
594        assert_eq!(labels, vec!["Alpha", "Beta", "Gamma"]);
595    }
596}