Skip to main content

aprender_tsp/instance/
tsplib.rs

1//! TSPLIB format parser.
2//!
3//! Reference: Reinelt (1991) "TSPLIB—A Traveling Salesman Problem Library"
4
5use crate::error::{TspError, TspResult};
6use crate::instance::{EdgeWeightType, TspInstance};
7use std::path::Path;
8
9/// Intermediate state during TSPLIB parsing
10#[derive(Default)]
11struct TsplibParseState {
12    name: String,
13    comment: Option<String>,
14    dimension: usize,
15    edge_weight_type: EdgeWeightType,
16    coords: Vec<(f64, f64)>,
17    edge_weights: Vec<f64>,
18    best_known: Option<f64>,
19}
20
21impl TsplibParseState {
22    fn into_instance(self, path: &Path) -> Result<TspInstance, TspError> {
23        if self.dimension == 0 {
24            return Err(TspError::ParseError {
25                file: path.to_path_buf(),
26                line: None,
27                cause: "Missing DIMENSION field".into(),
28            });
29        }
30
31        let distances = if !self.edge_weights.is_empty() {
32            TsplibParser::build_matrix_from_weights(&self.edge_weights, self.dimension, path)?
33        } else if !self.coords.is_empty() {
34            TsplibParser::compute_distance_matrix(&self.coords, self.edge_weight_type)
35        } else {
36            return Err(TspError::ParseError {
37                file: path.to_path_buf(),
38                line: None,
39                cause: "No coordinates or edge weights found".into(),
40            });
41        };
42
43        let name = if self.name.is_empty() {
44            path.file_stem()
45                .and_then(|s| s.to_str())
46                .unwrap_or("unnamed")
47                .to_string()
48        } else {
49            self.name
50        };
51
52        Ok(TspInstance {
53            name,
54            dimension: self.dimension,
55            comment: self.comment,
56            edge_weight_type: self.edge_weight_type,
57            coords: if self.coords.is_empty() {
58                None
59            } else {
60                Some(self.coords)
61            },
62            distances,
63            best_known: self.best_known,
64        })
65    }
66}
67
68/// Parser for TSPLIB format files
69#[derive(Debug)]
70pub struct TsplibParser;
71
72impl TsplibParser {
73    /// Parse a TSPLIB file
74    pub fn parse_file(path: &Path) -> TspResult<TspInstance> {
75        let content = std::fs::read_to_string(path)?;
76        Self::parse(&content, path)
77    }
78
79    /// Parse a single node coordinate line ("id x y")
80    fn parse_coord_line(line: &str, path: &Path, line_num: usize) -> TspResult<Option<(f64, f64)>> {
81        let parts: Vec<&str> = line.split_whitespace().collect();
82        if parts.len() >= 3 {
83            let x: f64 = parts[1].parse().map_err(|_| TspError::ParseError {
84                file: path.to_path_buf(),
85                line: Some(line_num + 1),
86                cause: format!("Invalid x coordinate: {}", parts[1]),
87            })?;
88            let y: f64 = parts[2].parse().map_err(|_| TspError::ParseError {
89                file: path.to_path_buf(),
90                line: Some(line_num + 1),
91                cause: format!("Invalid y coordinate: {}", parts[2]),
92            })?;
93            Ok(Some((x, y)))
94        } else {
95            Ok(None)
96        }
97    }
98
99    /// Parse edge weight values from a line
100    fn parse_weight_line(
101        line: &str,
102        path: &Path,
103        line_num: usize,
104        weights: &mut Vec<f64>,
105    ) -> TspResult<()> {
106        for part in line.split_whitespace() {
107            let weight: f64 = part.parse().map_err(|_| TspError::ParseError {
108                file: path.to_path_buf(),
109                line: Some(line_num + 1),
110                cause: format!("Invalid edge weight: {part}"),
111            })?;
112            weights.push(weight);
113        }
114        Ok(())
115    }
116
117    /// Parse a header key-value field
118    fn parse_header_field(
119        key: &str,
120        value: &str,
121        path: &Path,
122        line_num: usize,
123        state: &mut TsplibParseState,
124    ) -> TspResult<()> {
125        match key {
126            "NAME" => state.name = value.to_string(),
127            "COMMENT" => {
128                state.comment = Some(value.to_string());
129                if let Some(opt) = Self::extract_optimal_from_comment(value) {
130                    state.best_known = Some(opt);
131                }
132            }
133            "BEST_KNOWN" | "OPTIMAL" => {
134                if let Ok(opt) = value.parse::<f64>() {
135                    state.best_known = Some(opt);
136                }
137            }
138            "DIMENSION" => {
139                state.dimension = value.parse().map_err(|_| TspError::ParseError {
140                    file: path.to_path_buf(),
141                    line: Some(line_num + 1),
142                    cause: format!("Invalid dimension: {value}"),
143                })?;
144            }
145            "EDGE_WEIGHT_TYPE" => {
146                state.edge_weight_type = Self::parse_edge_weight_type(value, path, line_num)?;
147            }
148            _ => {}
149        }
150        Ok(())
151    }
152
153    /// Parse TSPLIB content
154    pub fn parse(content: &str, path: &Path) -> TspResult<TspInstance> {
155        let mut state = TsplibParseState::default();
156        let mut in_node_coord = false;
157        let mut in_edge_weight = false;
158
159        for (line_num, line) in content.lines().enumerate() {
160            let line = line.trim();
161            if line.is_empty() || line == "EOF" {
162                continue;
163            }
164
165            // Section headers
166            match line {
167                "NODE_COORD_SECTION" => {
168                    in_node_coord = true;
169                    in_edge_weight = false;
170                    continue;
171                }
172                "EDGE_WEIGHT_SECTION" => {
173                    in_edge_weight = true;
174                    in_node_coord = false;
175                    continue;
176                }
177                "DISPLAY_DATA_SECTION" => {
178                    in_node_coord = false;
179                    in_edge_weight = false;
180                    continue;
181                }
182                _ => {}
183            }
184
185            if in_node_coord {
186                if let Some(coord) = Self::parse_coord_line(line, path, line_num)? {
187                    state.coords.push(coord);
188                }
189            } else if in_edge_weight {
190                Self::parse_weight_line(line, path, line_num, &mut state.edge_weights)?;
191            } else if let Some((key, value)) = line.split_once(':') {
192                Self::parse_header_field(
193                    key.trim().to_uppercase().as_str(),
194                    value.trim(),
195                    path,
196                    line_num,
197                    &mut state,
198                )?;
199            }
200        }
201
202        state.into_instance(path)
203    }
204
205    fn parse_edge_weight_type(
206        value: &str,
207        path: &Path,
208        line_num: usize,
209    ) -> TspResult<EdgeWeightType> {
210        match value.to_uppercase().as_str() {
211            "EUC_2D" => Ok(EdgeWeightType::Euc2d),
212            "EUC_3D" => Ok(EdgeWeightType::Euc3d),
213            "CEIL_2D" => Ok(EdgeWeightType::Ceil2d),
214            "GEO" => Ok(EdgeWeightType::Geo),
215            "ATT" => Ok(EdgeWeightType::Att),
216            "EXPLICIT" => Ok(EdgeWeightType::Explicit),
217            _ => Err(TspError::ParseError {
218                file: path.to_path_buf(),
219                line: Some(line_num + 1),
220                cause: format!("Unsupported edge weight type: {value}"),
221            }),
222        }
223    }
224
225    fn compute_distance_matrix(coords: &[(f64, f64)], edge_type: EdgeWeightType) -> Vec<Vec<f64>> {
226        let n = coords.len();
227        let mut matrix = vec![vec![0.0; n]; n];
228
229        for i in 0..n {
230            for j in i + 1..n {
231                let dist = match edge_type {
232                    EdgeWeightType::Euc2d => {
233                        let dx = coords[i].0 - coords[j].0;
234                        let dy = coords[i].1 - coords[j].1;
235                        (dx * dx + dy * dy).sqrt()
236                    }
237                    EdgeWeightType::Ceil2d => {
238                        let dx = coords[i].0 - coords[j].0;
239                        let dy = coords[i].1 - coords[j].1;
240                        (dx * dx + dy * dy).sqrt().ceil()
241                    }
242                    EdgeWeightType::Att => {
243                        // ATT distance (pseudo-Euclidean)
244                        // TSPLIB formula: rij = sqrt((xd*xd + yd*yd) / 10.0)
245                        // Note: division by 10 is INSIDE sqrt, not after
246                        let dx = coords[i].0 - coords[j].0;
247                        let dy = coords[i].1 - coords[j].1;
248                        let r = ((dx * dx + dy * dy) / 10.0).sqrt();
249                        let t = r.round();
250                        if t < r {
251                            t + 1.0
252                        } else {
253                            t
254                        }
255                    }
256                    EdgeWeightType::Geo => {
257                        // Geographic distance
258                        Self::geo_distance(coords[i], coords[j])
259                    }
260                    _ => {
261                        // Default to Euclidean
262                        let dx = coords[i].0 - coords[j].0;
263                        let dy = coords[i].1 - coords[j].1;
264                        (dx * dx + dy * dy).sqrt()
265                    }
266                };
267                matrix[i][j] = dist;
268                matrix[j][i] = dist;
269            }
270        }
271
272        matrix
273    }
274
275    fn geo_distance(c1: (f64, f64), c2: (f64, f64)) -> f64 {
276        const PI: f64 = std::f64::consts::PI;
277        const RRR: f64 = 6378.388; // Earth radius in km
278
279        let deg_to_rad = |deg: f64| -> f64 {
280            let deg_int = deg.trunc();
281            let min = deg - deg_int;
282            PI * (deg_int + 5.0 * min / 3.0) / 180.0
283        };
284
285        let lat1 = deg_to_rad(c1.0);
286        let lon1 = deg_to_rad(c1.1);
287        let lat2 = deg_to_rad(c2.0);
288        let lon2 = deg_to_rad(c2.1);
289
290        let q1 = (lon1 - lon2).cos();
291        let q2 = (lat1 - lat2).cos();
292        let q3 = (lat1 + lat2).cos();
293
294        let dij = RRR * (0.5 * ((1.0 + q1) * q2 - (1.0 - q1) * q3)).acos() + 1.0;
295        dij.floor()
296    }
297
298    /// Extract optimal tour length from COMMENT field.
299    ///
300    /// Recognizes patterns like:
301    /// - "Optimal tour: 7542"
302    /// - "Optimal: 7542"
303    /// - "Best known: 426"
304    /// - "optimal solution: 10628"
305    /// - "Length = 7542"
306    fn extract_optimal_from_comment(comment: &str) -> Option<f64> {
307        let comment_lower = comment.to_lowercase();
308
309        // List of patterns to try
310        let patterns = [
311            "optimal tour:",
312            "optimal:",
313            "best known:",
314            "optimal solution:",
315            "best:",
316            "length =",
317            "length:",
318            "tour length:",
319        ];
320
321        for pattern in patterns {
322            if let Some(pos) = comment_lower.find(pattern) {
323                let after_pattern = &comment[pos + pattern.len()..];
324                // Extract the number following the pattern
325                let num_str: String = after_pattern
326                    .chars()
327                    .skip_while(|c| c.is_whitespace())
328                    .take_while(|c| c.is_ascii_digit() || *c == '.' || *c == ',')
329                    .filter(|c| *c != ',') // Remove thousand separators
330                    .collect();
331
332                if let Ok(val) = num_str.parse::<f64>() {
333                    return Some(val);
334                }
335            }
336        }
337
338        // Also try to find a standalone number at the end like "(7542)"
339        if let Some(start) = comment.rfind('(') {
340            if let Some(end) = comment.rfind(')') {
341                if start < end {
342                    let num_str: String = comment[start + 1..end]
343                        .chars()
344                        .filter(|c| c.is_ascii_digit() || *c == '.')
345                        .collect();
346                    if let Ok(val) = num_str.parse::<f64>() {
347                        return Some(val);
348                    }
349                }
350            }
351        }
352
353        None
354    }
355
356    fn build_matrix_from_weights(
357        weights: &[f64],
358        dimension: usize,
359        path: &Path,
360    ) -> TspResult<Vec<Vec<f64>>> {
361        let expected = dimension * (dimension - 1) / 2;
362        if weights.len() < expected {
363            return Err(TspError::ParseError {
364                file: path.to_path_buf(),
365                line: None,
366                cause: format!(
367                    "Not enough edge weights: got {}, expected at least {} for dimension {}",
368                    weights.len(),
369                    expected,
370                    dimension
371                ),
372            });
373        }
374
375        let mut matrix = vec![vec![0.0; dimension]; dimension];
376        let mut weight_iter = weights.iter();
377
378        // Assume lower triangular format
379        // Use range loops here as we need indices for symmetric matrix assignment
380        #[allow(clippy::needless_range_loop)]
381        for i in 1..dimension {
382            for j in 0..i {
383                if let Some(&weight) = weight_iter.next() {
384                    matrix[i][j] = weight;
385                    matrix[j][i] = weight;
386                }
387            }
388        }
389
390        Ok(matrix)
391    }
392}
393
394#[cfg(test)]
395#[path = "tsplib_tests.rs"]
396mod tests;