aprender_tsp/instance/
tsplib.rs

1//! TSPLIB format parser.
2//!
3//! Reference: Reinelt (1991) "TSPLIB—A Traveling Salesman Problem Library"
4
5use crate::error::{TspError, TspResult};
6use crate::instance::{EdgeWeightType, TspInstance};
7use std::path::Path;
8
9/// Parser for TSPLIB format files
10#[derive(Debug)]
11pub struct TsplibParser;
12
13impl TsplibParser {
14    /// Parse a TSPLIB file
15    pub fn parse_file(path: &Path) -> TspResult<TspInstance> {
16        let content = std::fs::read_to_string(path)?;
17        Self::parse(&content, path)
18    }
19
20    /// Parse TSPLIB content
21    #[allow(clippy::too_many_lines)]
22    pub fn parse(content: &str, path: &Path) -> TspResult<TspInstance> {
23        let mut name = String::new();
24        let mut comment = None;
25        let mut dimension = 0;
26        let mut edge_weight_type = EdgeWeightType::Euc2d;
27        let mut coords: Vec<(f64, f64)> = Vec::new();
28        let mut in_node_coord_section = false;
29        let mut in_edge_weight_section = false;
30        let mut edge_weights: Vec<f64> = Vec::new();
31        let mut best_known: Option<f64> = None;
32
33        for (line_num, line) in content.lines().enumerate() {
34            let line = line.trim();
35
36            if line.is_empty() || line == "EOF" {
37                continue;
38            }
39
40            // Check for section headers
41            if line == "NODE_COORD_SECTION" {
42                in_node_coord_section = true;
43                in_edge_weight_section = false;
44                continue;
45            }
46            if line == "EDGE_WEIGHT_SECTION" {
47                in_edge_weight_section = true;
48                in_node_coord_section = false;
49                continue;
50            }
51            if line == "DISPLAY_DATA_SECTION" {
52                in_node_coord_section = false;
53                in_edge_weight_section = false;
54                continue;
55            }
56
57            // Parse node coordinates
58            if in_node_coord_section {
59                let parts: Vec<&str> = line.split_whitespace().collect();
60                if parts.len() >= 3 {
61                    let x: f64 = parts[1].parse().map_err(|_| TspError::ParseError {
62                        file: path.to_path_buf(),
63                        line: Some(line_num + 1),
64                        cause: format!("Invalid x coordinate: {}", parts[1]),
65                    })?;
66                    let y: f64 = parts[2].parse().map_err(|_| TspError::ParseError {
67                        file: path.to_path_buf(),
68                        line: Some(line_num + 1),
69                        cause: format!("Invalid y coordinate: {}", parts[2]),
70                    })?;
71                    coords.push((x, y));
72                }
73                continue;
74            }
75
76            // Parse edge weights
77            if in_edge_weight_section {
78                for part in line.split_whitespace() {
79                    let weight: f64 = part.parse().map_err(|_| TspError::ParseError {
80                        file: path.to_path_buf(),
81                        line: Some(line_num + 1),
82                        cause: format!("Invalid edge weight: {part}"),
83                    })?;
84                    edge_weights.push(weight);
85                }
86                continue;
87            }
88
89            // Parse header fields
90            if let Some((key, value)) = line.split_once(':') {
91                let key = key.trim().to_uppercase();
92                let value = value.trim();
93
94                match key.as_str() {
95                    "NAME" => name = value.to_string(),
96                    "COMMENT" => {
97                        comment = Some(value.to_string());
98                        // Try to extract optimal tour value from comment
99                        // Common patterns: "Optimal tour: 7542", "Optimal: 7542", "Best known: 7542"
100                        if let Some(opt) = Self::extract_optimal_from_comment(value) {
101                            best_known = Some(opt);
102                        }
103                    }
104                    "BEST_KNOWN" | "OPTIMAL" => {
105                        // Some files have explicit BEST_KNOWN or OPTIMAL field
106                        if let Ok(opt) = value.parse::<f64>() {
107                            best_known = Some(opt);
108                        }
109                    }
110                    "DIMENSION" => {
111                        dimension = value.parse().map_err(|_| TspError::ParseError {
112                            file: path.to_path_buf(),
113                            line: Some(line_num + 1),
114                            cause: format!("Invalid dimension: {value}"),
115                        })?;
116                    }
117                    "EDGE_WEIGHT_TYPE" => {
118                        edge_weight_type = Self::parse_edge_weight_type(value, path, line_num)?;
119                    }
120                    // Unknown or informational fields - ignore
121                    _ => {}
122                }
123            }
124        }
125
126        // Validate parsed data
127        if dimension == 0 {
128            return Err(TspError::ParseError {
129                file: path.to_path_buf(),
130                line: None,
131                cause: "Missing DIMENSION field".into(),
132            });
133        }
134
135        // Build distance matrix
136        let distances = if !edge_weights.is_empty() {
137            Self::build_matrix_from_weights(&edge_weights, dimension, path)?
138        } else if !coords.is_empty() {
139            Self::compute_distance_matrix(&coords, edge_weight_type)
140        } else {
141            return Err(TspError::ParseError {
142                file: path.to_path_buf(),
143                line: None,
144                cause: "No coordinates or edge weights found".into(),
145            });
146        };
147
148        if name.is_empty() {
149            name = path
150                .file_stem()
151                .and_then(|s| s.to_str())
152                .unwrap_or("unnamed")
153                .to_string();
154        }
155
156        Ok(TspInstance {
157            name,
158            dimension,
159            comment,
160            edge_weight_type,
161            coords: if coords.is_empty() {
162                None
163            } else {
164                Some(coords)
165            },
166            distances,
167            best_known,
168        })
169    }
170
171    fn parse_edge_weight_type(
172        value: &str,
173        path: &Path,
174        line_num: usize,
175    ) -> TspResult<EdgeWeightType> {
176        match value.to_uppercase().as_str() {
177            "EUC_2D" => Ok(EdgeWeightType::Euc2d),
178            "EUC_3D" => Ok(EdgeWeightType::Euc3d),
179            "CEIL_2D" => Ok(EdgeWeightType::Ceil2d),
180            "GEO" => Ok(EdgeWeightType::Geo),
181            "ATT" => Ok(EdgeWeightType::Att),
182            "EXPLICIT" => Ok(EdgeWeightType::Explicit),
183            _ => Err(TspError::ParseError {
184                file: path.to_path_buf(),
185                line: Some(line_num + 1),
186                cause: format!("Unsupported edge weight type: {value}"),
187            }),
188        }
189    }
190
191    fn compute_distance_matrix(coords: &[(f64, f64)], edge_type: EdgeWeightType) -> Vec<Vec<f64>> {
192        let n = coords.len();
193        let mut matrix = vec![vec![0.0; n]; n];
194
195        for i in 0..n {
196            for j in i + 1..n {
197                let dist = match edge_type {
198                    EdgeWeightType::Euc2d => {
199                        let dx = coords[i].0 - coords[j].0;
200                        let dy = coords[i].1 - coords[j].1;
201                        (dx * dx + dy * dy).sqrt()
202                    }
203                    EdgeWeightType::Ceil2d => {
204                        let dx = coords[i].0 - coords[j].0;
205                        let dy = coords[i].1 - coords[j].1;
206                        (dx * dx + dy * dy).sqrt().ceil()
207                    }
208                    EdgeWeightType::Att => {
209                        // ATT distance (pseudo-Euclidean)
210                        // TSPLIB formula: rij = sqrt((xd*xd + yd*yd) / 10.0)
211                        // Note: division by 10 is INSIDE sqrt, not after
212                        let dx = coords[i].0 - coords[j].0;
213                        let dy = coords[i].1 - coords[j].1;
214                        let r = ((dx * dx + dy * dy) / 10.0).sqrt();
215                        let t = r.round();
216                        if t < r {
217                            t + 1.0
218                        } else {
219                            t
220                        }
221                    }
222                    EdgeWeightType::Geo => {
223                        // Geographic distance
224                        Self::geo_distance(coords[i], coords[j])
225                    }
226                    _ => {
227                        // Default to Euclidean
228                        let dx = coords[i].0 - coords[j].0;
229                        let dy = coords[i].1 - coords[j].1;
230                        (dx * dx + dy * dy).sqrt()
231                    }
232                };
233                matrix[i][j] = dist;
234                matrix[j][i] = dist;
235            }
236        }
237
238        matrix
239    }
240
241    fn geo_distance(c1: (f64, f64), c2: (f64, f64)) -> f64 {
242        const PI: f64 = std::f64::consts::PI;
243        const RRR: f64 = 6378.388; // Earth radius in km
244
245        let deg_to_rad = |deg: f64| -> f64 {
246            let deg_int = deg.trunc();
247            let min = deg - deg_int;
248            PI * (deg_int + 5.0 * min / 3.0) / 180.0
249        };
250
251        let lat1 = deg_to_rad(c1.0);
252        let lon1 = deg_to_rad(c1.1);
253        let lat2 = deg_to_rad(c2.0);
254        let lon2 = deg_to_rad(c2.1);
255
256        let q1 = (lon1 - lon2).cos();
257        let q2 = (lat1 - lat2).cos();
258        let q3 = (lat1 + lat2).cos();
259
260        let dij = RRR * (0.5 * ((1.0 + q1) * q2 - (1.0 - q1) * q3)).acos() + 1.0;
261        dij.floor()
262    }
263
264    /// Extract optimal tour length from COMMENT field.
265    ///
266    /// Recognizes patterns like:
267    /// - "Optimal tour: 7542"
268    /// - "Optimal: 7542"
269    /// - "Best known: 426"
270    /// - "optimal solution: 10628"
271    /// - "Length = 7542"
272    fn extract_optimal_from_comment(comment: &str) -> Option<f64> {
273        let comment_lower = comment.to_lowercase();
274
275        // List of patterns to try
276        let patterns = [
277            "optimal tour:",
278            "optimal:",
279            "best known:",
280            "optimal solution:",
281            "best:",
282            "length =",
283            "length:",
284            "tour length:",
285        ];
286
287        for pattern in patterns {
288            if let Some(pos) = comment_lower.find(pattern) {
289                let after_pattern = &comment[pos + pattern.len()..];
290                // Extract the number following the pattern
291                let num_str: String = after_pattern
292                    .chars()
293                    .skip_while(|c| c.is_whitespace())
294                    .take_while(|c| c.is_ascii_digit() || *c == '.' || *c == ',')
295                    .filter(|c| *c != ',') // Remove thousand separators
296                    .collect();
297
298                if let Ok(val) = num_str.parse::<f64>() {
299                    return Some(val);
300                }
301            }
302        }
303
304        // Also try to find a standalone number at the end like "(7542)"
305        if let Some(start) = comment.rfind('(') {
306            if let Some(end) = comment.rfind(')') {
307                if start < end {
308                    let num_str: String = comment[start + 1..end]
309                        .chars()
310                        .filter(|c| c.is_ascii_digit() || *c == '.')
311                        .collect();
312                    if let Ok(val) = num_str.parse::<f64>() {
313                        return Some(val);
314                    }
315                }
316            }
317        }
318
319        None
320    }
321
322    fn build_matrix_from_weights(
323        weights: &[f64],
324        dimension: usize,
325        path: &Path,
326    ) -> TspResult<Vec<Vec<f64>>> {
327        let expected = dimension * (dimension - 1) / 2;
328        if weights.len() < expected {
329            return Err(TspError::ParseError {
330                file: path.to_path_buf(),
331                line: None,
332                cause: format!(
333                    "Not enough edge weights: got {}, expected at least {} for dimension {}",
334                    weights.len(),
335                    expected,
336                    dimension
337                ),
338            });
339        }
340
341        let mut matrix = vec![vec![0.0; dimension]; dimension];
342        let mut weight_iter = weights.iter();
343
344        // Assume lower triangular format
345        // Use range loops here as we need indices for symmetric matrix assignment
346        #[allow(clippy::needless_range_loop)]
347        for i in 1..dimension {
348            for j in 0..i {
349                if let Some(&weight) = weight_iter.next() {
350                    matrix[i][j] = weight;
351                    matrix[j][i] = weight;
352                }
353            }
354        }
355
356        Ok(matrix)
357    }
358}
359
360#[cfg(test)]
361mod tests {
362    use super::*;
363    use std::path::PathBuf;
364
365    fn test_path() -> PathBuf {
366        PathBuf::from("test.tsp")
367    }
368
369    #[test]
370    fn test_parse_simple_tsplib() {
371        let content = r#"
372NAME: test
373TYPE: TSP
374COMMENT: A simple test
375DIMENSION: 3
376EDGE_WEIGHT_TYPE: EUC_2D
377NODE_COORD_SECTION
3781 0.0 0.0
3792 3.0 0.0
3803 3.0 4.0
381EOF
382"#;
383
384        let instance = TsplibParser::parse(content, &test_path()).expect("should parse");
385
386        assert_eq!(instance.name, "test");
387        assert_eq!(instance.dimension, 3);
388        assert_eq!(instance.comment, Some("A simple test".into()));
389        assert!(instance.coords.is_some());
390        assert_eq!(instance.coords.as_ref().map(|c| c.len()), Some(3));
391    }
392
393    #[test]
394    fn test_parse_computes_distances() {
395        let content = r#"
396NAME: triangle
397DIMENSION: 3
398EDGE_WEIGHT_TYPE: EUC_2D
399NODE_COORD_SECTION
4001 0.0 0.0
4012 3.0 0.0
4023 3.0 4.0
403EOF
404"#;
405
406        let instance = TsplibParser::parse(content, &test_path()).expect("should parse");
407
408        // 3-4-5 triangle
409        assert!((instance.distance(0, 1) - 3.0).abs() < 1e-10);
410        assert!((instance.distance(1, 2) - 4.0).abs() < 1e-10);
411        assert!((instance.distance(0, 2) - 5.0).abs() < 1e-10);
412    }
413
414    #[test]
415    fn test_parse_ceil_2d() {
416        let content = r#"
417NAME: ceil_test
418DIMENSION: 2
419EDGE_WEIGHT_TYPE: CEIL_2D
420NODE_COORD_SECTION
4211 0.0 0.0
4222 1.5 0.0
423EOF
424"#;
425
426        let instance = TsplibParser::parse(content, &test_path()).expect("should parse");
427
428        // Distance 1.5 should be ceiling'd to 2.0
429        assert!((instance.distance(0, 1) - 2.0).abs() < 1e-10);
430    }
431
432    #[test]
433    fn test_parse_missing_dimension() {
434        let content = r#"
435NAME: test
436NODE_COORD_SECTION
4371 0.0 0.0
438EOF
439"#;
440
441        let result = TsplibParser::parse(content, &test_path());
442        assert!(result.is_err());
443        let err = result.unwrap_err().to_string();
444        assert!(err.contains("DIMENSION"));
445    }
446
447    #[test]
448    fn test_parse_no_coords_or_weights() {
449        let content = r#"
450NAME: test
451DIMENSION: 3
452EDGE_WEIGHT_TYPE: EUC_2D
453EOF
454"#;
455
456        let result = TsplibParser::parse(content, &test_path());
457        assert!(result.is_err());
458    }
459
460    #[test]
461    fn test_parse_invalid_coordinate() {
462        let content = r#"
463NAME: test
464DIMENSION: 2
465EDGE_WEIGHT_TYPE: EUC_2D
466NODE_COORD_SECTION
4671 0.0 0.0
4682 abc 0.0
469EOF
470"#;
471
472        let result = TsplibParser::parse(content, &test_path());
473        assert!(result.is_err());
474        let err = result.unwrap_err().to_string();
475        assert!(err.contains("Invalid x coordinate"));
476    }
477
478    #[test]
479    fn test_parse_explicit_weights() {
480        let content = r#"
481NAME: explicit_test
482DIMENSION: 3
483EDGE_WEIGHT_TYPE: EXPLICIT
484EDGE_WEIGHT_SECTION
48510
48620 30
487EOF
488"#;
489
490        let instance = TsplibParser::parse(content, &test_path()).expect("should parse");
491
492        // Lower triangular: [1,0]=10, [2,0]=20, [2,1]=30
493        assert!((instance.distance(1, 0) - 10.0).abs() < 1e-10);
494        assert!((instance.distance(2, 0) - 20.0).abs() < 1e-10);
495        assert!((instance.distance(2, 1) - 30.0).abs() < 1e-10);
496    }
497
498    #[test]
499    fn test_parse_unsupported_edge_type() {
500        let content = r#"
501NAME: test
502DIMENSION: 3
503EDGE_WEIGHT_TYPE: UNKNOWN_TYPE
504NODE_COORD_SECTION
5051 0.0 0.0
506EOF
507"#;
508
509        let result = TsplibParser::parse(content, &test_path());
510        assert!(result.is_err());
511        let err = result.unwrap_err().to_string();
512        assert!(err.contains("Unsupported edge weight type"));
513    }
514
515    #[test]
516    fn test_att_distance() {
517        let content = r#"
518NAME: att_test
519DIMENSION: 2
520EDGE_WEIGHT_TYPE: ATT
521NODE_COORD_SECTION
5221 0.0 0.0
5232 100.0 0.0
524EOF
525"#;
526
527        let instance = TsplibParser::parse(content, &test_path()).expect("should parse");
528
529        // ATT distance should be different from Euclidean
530        let dist = instance.distance(0, 1);
531        assert!(dist > 0.0);
532    }
533
534    #[test]
535    fn test_att_distance_matches_tsplib() {
536        // Regression test: ATT formula must have division INSIDE sqrt
537        // TSPLIB formula: rij = sqrt((xd*xd + yd*yd) / 10.0)
538        // Using att48 city 1 (6734, 1453) and city 2 (2233, 10)
539        let content = r#"
540NAME: att48_subset
541DIMENSION: 2
542EDGE_WEIGHT_TYPE: ATT
543NODE_COORD_SECTION
5441 6734 1453
5452 2233 10
546EOF
547"#;
548
549        let instance = TsplibParser::parse(content, &test_path()).expect("should parse");
550
551        // Expected calculation:
552        // xd = 6734 - 2233 = 4501
553        // yd = 1453 - 10 = 1443
554        // r = sqrt((4501^2 + 1443^2) / 10) = sqrt(22341250 / 10) = sqrt(2234125) = 1494.69
555        // nint(1494.69) = 1495, and 1495 > 1494.69 so dij = 1495
556        let dist = instance.distance(0, 1);
557        assert!(
558            (dist - 1495.0).abs() < 1.0,
559            "ATT distance should be ~1495 (TSPLIB verified), got {}",
560            dist
561        );
562    }
563
564    #[test]
565    fn test_name_defaults_to_filename() {
566        let content = r#"
567DIMENSION: 2
568EDGE_WEIGHT_TYPE: EUC_2D
569NODE_COORD_SECTION
5701 0.0 0.0
5712 1.0 0.0
572EOF
573"#;
574
575        let instance =
576            TsplibParser::parse(content, &PathBuf::from("my_instance.tsp")).expect("should parse");
577        assert_eq!(instance.name, "my_instance");
578    }
579
580    #[test]
581    fn test_extract_optimal_from_comment_optimal_tour() {
582        let opt = TsplibParser::extract_optimal_from_comment("Optimal tour: 7542");
583        assert_eq!(opt, Some(7542.0));
584    }
585
586    #[test]
587    fn test_extract_optimal_from_comment_best_known() {
588        let opt = TsplibParser::extract_optimal_from_comment("Best known: 426");
589        assert_eq!(opt, Some(426.0));
590    }
591
592    #[test]
593    fn test_extract_optimal_from_comment_parentheses() {
594        let opt = TsplibParser::extract_optimal_from_comment("52 locations in Berlin (7542)");
595        assert_eq!(opt, Some(7542.0));
596    }
597
598    #[test]
599    fn test_extract_optimal_from_comment_with_thousands() {
600        let opt = TsplibParser::extract_optimal_from_comment("Optimal: 10,628");
601        assert_eq!(opt, Some(10628.0));
602    }
603
604    #[test]
605    fn test_extract_optimal_from_comment_no_match() {
606        let opt = TsplibParser::extract_optimal_from_comment("Just a plain comment");
607        assert_eq!(opt, None);
608    }
609
610    #[test]
611    fn test_parse_with_optimal_in_comment() {
612        let content = r#"
613NAME: test_optimal
614TYPE: TSP
615COMMENT: Optimal tour: 100
616DIMENSION: 3
617EDGE_WEIGHT_TYPE: EUC_2D
618NODE_COORD_SECTION
6191 0.0 0.0
6202 3.0 0.0
6213 3.0 4.0
622EOF
623"#;
624
625        let instance = TsplibParser::parse(content, &test_path()).expect("should parse");
626        assert_eq!(instance.best_known, Some(100.0));
627    }
628
629    #[test]
630    fn test_parse_with_explicit_best_known_field() {
631        let content = r#"
632NAME: test_explicit
633TYPE: TSP
634BEST_KNOWN: 7542
635DIMENSION: 3
636EDGE_WEIGHT_TYPE: EUC_2D
637NODE_COORD_SECTION
6381 0.0 0.0
6392 3.0 0.0
6403 3.0 4.0
641EOF
642"#;
643
644        let instance = TsplibParser::parse(content, &test_path()).expect("should parse");
645        assert_eq!(instance.best_known, Some(7542.0));
646    }
647}