1use crate::error::{TspError, TspResult};
6use crate::instance::{EdgeWeightType, TspInstance};
7use std::path::Path;
8
9#[derive(Debug)]
11pub struct TsplibParser;
12
13impl TsplibParser {
14 pub fn parse_file(path: &Path) -> TspResult<TspInstance> {
16 let content = std::fs::read_to_string(path)?;
17 Self::parse(&content, path)
18 }
19
20 #[allow(clippy::too_many_lines)]
22 pub fn parse(content: &str, path: &Path) -> TspResult<TspInstance> {
23 let mut name = String::new();
24 let mut comment = None;
25 let mut dimension = 0;
26 let mut edge_weight_type = EdgeWeightType::Euc2d;
27 let mut coords: Vec<(f64, f64)> = Vec::new();
28 let mut in_node_coord_section = false;
29 let mut in_edge_weight_section = false;
30 let mut edge_weights: Vec<f64> = Vec::new();
31 let mut best_known: Option<f64> = None;
32
33 for (line_num, line) in content.lines().enumerate() {
34 let line = line.trim();
35
36 if line.is_empty() || line == "EOF" {
37 continue;
38 }
39
40 if line == "NODE_COORD_SECTION" {
42 in_node_coord_section = true;
43 in_edge_weight_section = false;
44 continue;
45 }
46 if line == "EDGE_WEIGHT_SECTION" {
47 in_edge_weight_section = true;
48 in_node_coord_section = false;
49 continue;
50 }
51 if line == "DISPLAY_DATA_SECTION" {
52 in_node_coord_section = false;
53 in_edge_weight_section = false;
54 continue;
55 }
56
57 if in_node_coord_section {
59 let parts: Vec<&str> = line.split_whitespace().collect();
60 if parts.len() >= 3 {
61 let x: f64 = parts[1].parse().map_err(|_| TspError::ParseError {
62 file: path.to_path_buf(),
63 line: Some(line_num + 1),
64 cause: format!("Invalid x coordinate: {}", parts[1]),
65 })?;
66 let y: f64 = parts[2].parse().map_err(|_| TspError::ParseError {
67 file: path.to_path_buf(),
68 line: Some(line_num + 1),
69 cause: format!("Invalid y coordinate: {}", parts[2]),
70 })?;
71 coords.push((x, y));
72 }
73 continue;
74 }
75
76 if in_edge_weight_section {
78 for part in line.split_whitespace() {
79 let weight: f64 = part.parse().map_err(|_| TspError::ParseError {
80 file: path.to_path_buf(),
81 line: Some(line_num + 1),
82 cause: format!("Invalid edge weight: {part}"),
83 })?;
84 edge_weights.push(weight);
85 }
86 continue;
87 }
88
89 if let Some((key, value)) = line.split_once(':') {
91 let key = key.trim().to_uppercase();
92 let value = value.trim();
93
94 match key.as_str() {
95 "NAME" => name = value.to_string(),
96 "COMMENT" => {
97 comment = Some(value.to_string());
98 if let Some(opt) = Self::extract_optimal_from_comment(value) {
101 best_known = Some(opt);
102 }
103 }
104 "BEST_KNOWN" | "OPTIMAL" => {
105 if let Ok(opt) = value.parse::<f64>() {
107 best_known = Some(opt);
108 }
109 }
110 "DIMENSION" => {
111 dimension = value.parse().map_err(|_| TspError::ParseError {
112 file: path.to_path_buf(),
113 line: Some(line_num + 1),
114 cause: format!("Invalid dimension: {value}"),
115 })?;
116 }
117 "EDGE_WEIGHT_TYPE" => {
118 edge_weight_type = Self::parse_edge_weight_type(value, path, line_num)?;
119 }
120 _ => {}
122 }
123 }
124 }
125
126 if dimension == 0 {
128 return Err(TspError::ParseError {
129 file: path.to_path_buf(),
130 line: None,
131 cause: "Missing DIMENSION field".into(),
132 });
133 }
134
135 let distances = if !edge_weights.is_empty() {
137 Self::build_matrix_from_weights(&edge_weights, dimension, path)?
138 } else if !coords.is_empty() {
139 Self::compute_distance_matrix(&coords, edge_weight_type)
140 } else {
141 return Err(TspError::ParseError {
142 file: path.to_path_buf(),
143 line: None,
144 cause: "No coordinates or edge weights found".into(),
145 });
146 };
147
148 if name.is_empty() {
149 name = path
150 .file_stem()
151 .and_then(|s| s.to_str())
152 .unwrap_or("unnamed")
153 .to_string();
154 }
155
156 Ok(TspInstance {
157 name,
158 dimension,
159 comment,
160 edge_weight_type,
161 coords: if coords.is_empty() {
162 None
163 } else {
164 Some(coords)
165 },
166 distances,
167 best_known,
168 })
169 }
170
171 fn parse_edge_weight_type(
172 value: &str,
173 path: &Path,
174 line_num: usize,
175 ) -> TspResult<EdgeWeightType> {
176 match value.to_uppercase().as_str() {
177 "EUC_2D" => Ok(EdgeWeightType::Euc2d),
178 "EUC_3D" => Ok(EdgeWeightType::Euc3d),
179 "CEIL_2D" => Ok(EdgeWeightType::Ceil2d),
180 "GEO" => Ok(EdgeWeightType::Geo),
181 "ATT" => Ok(EdgeWeightType::Att),
182 "EXPLICIT" => Ok(EdgeWeightType::Explicit),
183 _ => Err(TspError::ParseError {
184 file: path.to_path_buf(),
185 line: Some(line_num + 1),
186 cause: format!("Unsupported edge weight type: {value}"),
187 }),
188 }
189 }
190
191 fn compute_distance_matrix(coords: &[(f64, f64)], edge_type: EdgeWeightType) -> Vec<Vec<f64>> {
192 let n = coords.len();
193 let mut matrix = vec![vec![0.0; n]; n];
194
195 for i in 0..n {
196 for j in i + 1..n {
197 let dist = match edge_type {
198 EdgeWeightType::Euc2d => {
199 let dx = coords[i].0 - coords[j].0;
200 let dy = coords[i].1 - coords[j].1;
201 (dx * dx + dy * dy).sqrt()
202 }
203 EdgeWeightType::Ceil2d => {
204 let dx = coords[i].0 - coords[j].0;
205 let dy = coords[i].1 - coords[j].1;
206 (dx * dx + dy * dy).sqrt().ceil()
207 }
208 EdgeWeightType::Att => {
209 let dx = coords[i].0 - coords[j].0;
213 let dy = coords[i].1 - coords[j].1;
214 let r = ((dx * dx + dy * dy) / 10.0).sqrt();
215 let t = r.round();
216 if t < r {
217 t + 1.0
218 } else {
219 t
220 }
221 }
222 EdgeWeightType::Geo => {
223 Self::geo_distance(coords[i], coords[j])
225 }
226 _ => {
227 let dx = coords[i].0 - coords[j].0;
229 let dy = coords[i].1 - coords[j].1;
230 (dx * dx + dy * dy).sqrt()
231 }
232 };
233 matrix[i][j] = dist;
234 matrix[j][i] = dist;
235 }
236 }
237
238 matrix
239 }
240
241 fn geo_distance(c1: (f64, f64), c2: (f64, f64)) -> f64 {
242 const PI: f64 = std::f64::consts::PI;
243 const RRR: f64 = 6378.388; let deg_to_rad = |deg: f64| -> f64 {
246 let deg_int = deg.trunc();
247 let min = deg - deg_int;
248 PI * (deg_int + 5.0 * min / 3.0) / 180.0
249 };
250
251 let lat1 = deg_to_rad(c1.0);
252 let lon1 = deg_to_rad(c1.1);
253 let lat2 = deg_to_rad(c2.0);
254 let lon2 = deg_to_rad(c2.1);
255
256 let q1 = (lon1 - lon2).cos();
257 let q2 = (lat1 - lat2).cos();
258 let q3 = (lat1 + lat2).cos();
259
260 let dij = RRR * (0.5 * ((1.0 + q1) * q2 - (1.0 - q1) * q3)).acos() + 1.0;
261 dij.floor()
262 }
263
264 fn extract_optimal_from_comment(comment: &str) -> Option<f64> {
273 let comment_lower = comment.to_lowercase();
274
275 let patterns = [
277 "optimal tour:",
278 "optimal:",
279 "best known:",
280 "optimal solution:",
281 "best:",
282 "length =",
283 "length:",
284 "tour length:",
285 ];
286
287 for pattern in patterns {
288 if let Some(pos) = comment_lower.find(pattern) {
289 let after_pattern = &comment[pos + pattern.len()..];
290 let num_str: String = after_pattern
292 .chars()
293 .skip_while(|c| c.is_whitespace())
294 .take_while(|c| c.is_ascii_digit() || *c == '.' || *c == ',')
295 .filter(|c| *c != ',') .collect();
297
298 if let Ok(val) = num_str.parse::<f64>() {
299 return Some(val);
300 }
301 }
302 }
303
304 if let Some(start) = comment.rfind('(') {
306 if let Some(end) = comment.rfind(')') {
307 if start < end {
308 let num_str: String = comment[start + 1..end]
309 .chars()
310 .filter(|c| c.is_ascii_digit() || *c == '.')
311 .collect();
312 if let Ok(val) = num_str.parse::<f64>() {
313 return Some(val);
314 }
315 }
316 }
317 }
318
319 None
320 }
321
322 fn build_matrix_from_weights(
323 weights: &[f64],
324 dimension: usize,
325 path: &Path,
326 ) -> TspResult<Vec<Vec<f64>>> {
327 let expected = dimension * (dimension - 1) / 2;
328 if weights.len() < expected {
329 return Err(TspError::ParseError {
330 file: path.to_path_buf(),
331 line: None,
332 cause: format!(
333 "Not enough edge weights: got {}, expected at least {} for dimension {}",
334 weights.len(),
335 expected,
336 dimension
337 ),
338 });
339 }
340
341 let mut matrix = vec![vec![0.0; dimension]; dimension];
342 let mut weight_iter = weights.iter();
343
344 #[allow(clippy::needless_range_loop)]
347 for i in 1..dimension {
348 for j in 0..i {
349 if let Some(&weight) = weight_iter.next() {
350 matrix[i][j] = weight;
351 matrix[j][i] = weight;
352 }
353 }
354 }
355
356 Ok(matrix)
357 }
358}
359
360#[cfg(test)]
361mod tests {
362 use super::*;
363 use std::path::PathBuf;
364
365 fn test_path() -> PathBuf {
366 PathBuf::from("test.tsp")
367 }
368
369 #[test]
370 fn test_parse_simple_tsplib() {
371 let content = r#"
372NAME: test
373TYPE: TSP
374COMMENT: A simple test
375DIMENSION: 3
376EDGE_WEIGHT_TYPE: EUC_2D
377NODE_COORD_SECTION
3781 0.0 0.0
3792 3.0 0.0
3803 3.0 4.0
381EOF
382"#;
383
384 let instance = TsplibParser::parse(content, &test_path()).expect("should parse");
385
386 assert_eq!(instance.name, "test");
387 assert_eq!(instance.dimension, 3);
388 assert_eq!(instance.comment, Some("A simple test".into()));
389 assert!(instance.coords.is_some());
390 assert_eq!(instance.coords.as_ref().map(|c| c.len()), Some(3));
391 }
392
393 #[test]
394 fn test_parse_computes_distances() {
395 let content = r#"
396NAME: triangle
397DIMENSION: 3
398EDGE_WEIGHT_TYPE: EUC_2D
399NODE_COORD_SECTION
4001 0.0 0.0
4012 3.0 0.0
4023 3.0 4.0
403EOF
404"#;
405
406 let instance = TsplibParser::parse(content, &test_path()).expect("should parse");
407
408 assert!((instance.distance(0, 1) - 3.0).abs() < 1e-10);
410 assert!((instance.distance(1, 2) - 4.0).abs() < 1e-10);
411 assert!((instance.distance(0, 2) - 5.0).abs() < 1e-10);
412 }
413
414 #[test]
415 fn test_parse_ceil_2d() {
416 let content = r#"
417NAME: ceil_test
418DIMENSION: 2
419EDGE_WEIGHT_TYPE: CEIL_2D
420NODE_COORD_SECTION
4211 0.0 0.0
4222 1.5 0.0
423EOF
424"#;
425
426 let instance = TsplibParser::parse(content, &test_path()).expect("should parse");
427
428 assert!((instance.distance(0, 1) - 2.0).abs() < 1e-10);
430 }
431
432 #[test]
433 fn test_parse_missing_dimension() {
434 let content = r#"
435NAME: test
436NODE_COORD_SECTION
4371 0.0 0.0
438EOF
439"#;
440
441 let result = TsplibParser::parse(content, &test_path());
442 assert!(result.is_err());
443 let err = result.unwrap_err().to_string();
444 assert!(err.contains("DIMENSION"));
445 }
446
447 #[test]
448 fn test_parse_no_coords_or_weights() {
449 let content = r#"
450NAME: test
451DIMENSION: 3
452EDGE_WEIGHT_TYPE: EUC_2D
453EOF
454"#;
455
456 let result = TsplibParser::parse(content, &test_path());
457 assert!(result.is_err());
458 }
459
460 #[test]
461 fn test_parse_invalid_coordinate() {
462 let content = r#"
463NAME: test
464DIMENSION: 2
465EDGE_WEIGHT_TYPE: EUC_2D
466NODE_COORD_SECTION
4671 0.0 0.0
4682 abc 0.0
469EOF
470"#;
471
472 let result = TsplibParser::parse(content, &test_path());
473 assert!(result.is_err());
474 let err = result.unwrap_err().to_string();
475 assert!(err.contains("Invalid x coordinate"));
476 }
477
478 #[test]
479 fn test_parse_explicit_weights() {
480 let content = r#"
481NAME: explicit_test
482DIMENSION: 3
483EDGE_WEIGHT_TYPE: EXPLICIT
484EDGE_WEIGHT_SECTION
48510
48620 30
487EOF
488"#;
489
490 let instance = TsplibParser::parse(content, &test_path()).expect("should parse");
491
492 assert!((instance.distance(1, 0) - 10.0).abs() < 1e-10);
494 assert!((instance.distance(2, 0) - 20.0).abs() < 1e-10);
495 assert!((instance.distance(2, 1) - 30.0).abs() < 1e-10);
496 }
497
498 #[test]
499 fn test_parse_unsupported_edge_type() {
500 let content = r#"
501NAME: test
502DIMENSION: 3
503EDGE_WEIGHT_TYPE: UNKNOWN_TYPE
504NODE_COORD_SECTION
5051 0.0 0.0
506EOF
507"#;
508
509 let result = TsplibParser::parse(content, &test_path());
510 assert!(result.is_err());
511 let err = result.unwrap_err().to_string();
512 assert!(err.contains("Unsupported edge weight type"));
513 }
514
515 #[test]
516 fn test_att_distance() {
517 let content = r#"
518NAME: att_test
519DIMENSION: 2
520EDGE_WEIGHT_TYPE: ATT
521NODE_COORD_SECTION
5221 0.0 0.0
5232 100.0 0.0
524EOF
525"#;
526
527 let instance = TsplibParser::parse(content, &test_path()).expect("should parse");
528
529 let dist = instance.distance(0, 1);
531 assert!(dist > 0.0);
532 }
533
534 #[test]
535 fn test_att_distance_matches_tsplib() {
536 let content = r#"
540NAME: att48_subset
541DIMENSION: 2
542EDGE_WEIGHT_TYPE: ATT
543NODE_COORD_SECTION
5441 6734 1453
5452 2233 10
546EOF
547"#;
548
549 let instance = TsplibParser::parse(content, &test_path()).expect("should parse");
550
551 let dist = instance.distance(0, 1);
557 assert!(
558 (dist - 1495.0).abs() < 1.0,
559 "ATT distance should be ~1495 (TSPLIB verified), got {}",
560 dist
561 );
562 }
563
564 #[test]
565 fn test_name_defaults_to_filename() {
566 let content = r#"
567DIMENSION: 2
568EDGE_WEIGHT_TYPE: EUC_2D
569NODE_COORD_SECTION
5701 0.0 0.0
5712 1.0 0.0
572EOF
573"#;
574
575 let instance =
576 TsplibParser::parse(content, &PathBuf::from("my_instance.tsp")).expect("should parse");
577 assert_eq!(instance.name, "my_instance");
578 }
579
580 #[test]
581 fn test_extract_optimal_from_comment_optimal_tour() {
582 let opt = TsplibParser::extract_optimal_from_comment("Optimal tour: 7542");
583 assert_eq!(opt, Some(7542.0));
584 }
585
586 #[test]
587 fn test_extract_optimal_from_comment_best_known() {
588 let opt = TsplibParser::extract_optimal_from_comment("Best known: 426");
589 assert_eq!(opt, Some(426.0));
590 }
591
592 #[test]
593 fn test_extract_optimal_from_comment_parentheses() {
594 let opt = TsplibParser::extract_optimal_from_comment("52 locations in Berlin (7542)");
595 assert_eq!(opt, Some(7542.0));
596 }
597
598 #[test]
599 fn test_extract_optimal_from_comment_with_thousands() {
600 let opt = TsplibParser::extract_optimal_from_comment("Optimal: 10,628");
601 assert_eq!(opt, Some(10628.0));
602 }
603
604 #[test]
605 fn test_extract_optimal_from_comment_no_match() {
606 let opt = TsplibParser::extract_optimal_from_comment("Just a plain comment");
607 assert_eq!(opt, None);
608 }
609
610 #[test]
611 fn test_parse_with_optimal_in_comment() {
612 let content = r#"
613NAME: test_optimal
614TYPE: TSP
615COMMENT: Optimal tour: 100
616DIMENSION: 3
617EDGE_WEIGHT_TYPE: EUC_2D
618NODE_COORD_SECTION
6191 0.0 0.0
6202 3.0 0.0
6213 3.0 4.0
622EOF
623"#;
624
625 let instance = TsplibParser::parse(content, &test_path()).expect("should parse");
626 assert_eq!(instance.best_known, Some(100.0));
627 }
628
629 #[test]
630 fn test_parse_with_explicit_best_known_field() {
631 let content = r#"
632NAME: test_explicit
633TYPE: TSP
634BEST_KNOWN: 7542
635DIMENSION: 3
636EDGE_WEIGHT_TYPE: EUC_2D
637NODE_COORD_SECTION
6381 0.0 0.0
6392 3.0 0.0
6403 3.0 4.0
641EOF
642"#;
643
644 let instance = TsplibParser::parse(content, &test_path()).expect("should parse");
645 assert_eq!(instance.best_known, Some(7542.0));
646 }
647}