sat_solver/sat/
dimacs.rs

1#![warn(clippy::all, clippy::pedantic, clippy::nursery, clippy::cargo)]
2//! A parser for the DIMACS CNF (Conjunctive Normal Form) file format.
3//!
4//! The DIMACS CNF format is a standard text-based format for representing
5//! boolean satisfiability problems. This module provides functions to parse
6//! such files into an in-memory `Cnf` (Conjunctive Normal Form) structure.
7//!
8//! The format typically includes:
9//! - Comment lines starting with 'c'.
10//! - A problem line starting with 'p cnf <`num_variables`> <`num_clauses`>'.
11//! - Clause lines, where each line represents a clause. Literals are represented
12//!   by integers (positive for the variable, negative for its negation).
13//!   Each clause line is terminated by a '0'.
14//! - An optional '%' line to indicate end-of-data (used in competition data).
15//!
16//! This parser focuses on extracting the clause data.
17
18use crate::sat::clause_storage::LiteralStorage;
19use crate::sat::cnf::Cnf;
20use crate::sat::literal::Literal;
21use itertools::Itertools; // For `collect_vec`
22use std::io::{self, BufRead};
23use std::path::PathBuf;
24
25/// Parses a DIMACS CNF file from a `BufRead` source.
26///
27/// This function is a convenience wrapper around `parse_dimacs`.
28/// It reads the input from a string slice, which is useful for testing or
29/// when the DIMACS data is available as a string.
30///
31/// # Type Parameters
32/// * `L`: The `Literal` type to be used in the resulting `Cnf`.
33/// * `S`: The `LiteralStorage` type for clauses in the resulting `Cnf`.
34/// # Arguments
35/// * `dimacs_text`: A string slice containing the DIMACS CNF data.
36/// # Errors
37/// - If parsing fails due to malformed input (e.g. non-integer literals, missing terminators).
38pub fn parse_dimacs_text<L: Literal, S: LiteralStorage<L>>(
39    dimacs_text: &str,
40) -> Result<Cnf<L, S>, String> {
41    let reader = io::Cursor::new(dimacs_text);
42    parse_dimacs(reader)
43}
44
45/// Parses DIMACS formatted data from a `BufRead` source into a `Cnf` structure.
46///
47/// It reads the input line by line:
48/// - Lines starting with 'c' (comments) or 'p' (problem line) are currently skipped without parsing their content.
49/// - Lines starting with '%' or empty lines after the main content are treated as end-of-data markers.
50/// - Other lines are treated as clause definitions. Each number on such a line is parsed as an `i32` literal.
51///   The terminating '0' of a DIMACS clause line is filtered out.
52///
53/// # Type Parameters
54///
55/// * `R`: A type that implements `BufRead` (e.g. `io::BufReader<File>`).
56/// * `L`: The `Literal` type to be used in the resulting `Cnf`.
57/// * `S`: The `LiteralStorage` type for clauses in the resulting `Cnf`.
58///
59/// # Panics
60///
61/// - If reading a line from the `reader` fails (e.g. I/O error, invalid UTF-8).
62/// - If parsing a literal string (e.g. "1", "-2") into an `i32` fails. This implies
63///   a malformed DIMACS file if non-integer tokens appear where literals are expected.
64///
65/// # Returns
66///
67/// A `Cnf<L, S>` structure representing the parsed formula.
68/// Or `None` if no clauses were found (e.g. empty file or only comments).
69///
70/// # Errors
71/// Returns `()` if parsing fails due to malformed input (e.g. non-integer literals, missing terminators).
72pub fn parse_dimacs<R: BufRead, L: Literal, S: LiteralStorage<L>>(
73    reader: R,
74) -> Result<Cnf<L, S>, String> {
75    let mut lines = reader
76        .lines()
77        .map_while(Result::ok)
78        .filter(|line| !line.is_empty());
79
80    let mut clauses_dimacs: Vec<Vec<i32>> = Vec::new();
81
82    for line_str in &mut lines {
83        let mut parts = line_str.split_whitespace().peekable();
84
85        match parts.peek() {
86            Some(&"%") => break,
87            None | Some(&"c" | &"p") => {}
88            Some(_) => {
89                let clause_literals: Vec<i32> = parts
90                    .map(|s| {
91                        s.parse::<i32>()
92                            .unwrap_or_else(|e| panic!("Failed to parse literal '{s}' as i32: {e}"))
93                    })
94                    .filter(|&p| p != 0)
95                    .collect_vec();
96
97                if !clause_literals.is_empty() {
98                    clauses_dimacs.push(clause_literals);
99                }
100            }
101        }
102    }
103    Ok(Cnf::new(clauses_dimacs))
104}
105
106/// Parses a DIMACS CNF file specified by its path.
107///
108/// This is a convenience function that opens the file, wraps it in a `BufReader`,
109/// and then calls `parse_dimacs`.
110///
111/// # Type Parameters
112///
113/// * `T`: The `Literal` type for the `Cnf`.
114/// * `S`: The `LiteralStorage` type for the `Cnf`.
115///
116/// # Arguments
117///
118/// * `file_path`: A string slice representing the path to the DIMACS file.
119///
120/// # Errors
121///
122/// Returns `io::Result::Err` if the file cannot be opened or read (e.g. path does not exist,
123/// permissions error). Panics from `parse_dimacs` (e.g. malformed content) will propagate.
124///
125/// # Returns
126///
127/// `io::Result::Ok(Cnf<T, S>)` if parsing is successful.
128pub fn parse_file<T: Literal, S: LiteralStorage<T>>(file_path: &PathBuf) -> io::Result<Cnf<T, S>> {
129    let file = std::fs::File::open(file_path)?;
130    let reader = io::BufReader::new(file);
131    let cnf: Cnf<T, S> = parse_dimacs(reader).map_err(|s| {
132        io::Error::new(
133            io::ErrorKind::InvalidData,
134            format!("Failed to parse DIMACS file: {}, {s}", file_path.display()),
135        )
136    })?;
137    if cnf.clauses.is_empty() {
138        return Err(io::Error::new(
139            io::ErrorKind::InvalidData,
140            format!("No clauses found in DIMACS file: {}", file_path.display()),
141        ));
142    }
143    Ok(cnf)
144}
145
146/// Recursively finds all files in a directory and its subdirectories.
147///
148/// # Arguments
149///
150/// * `dir_path`: A string slice representing the path to the directory to scan.
151///
152/// # Errors
153///
154/// Returns `io::Result::Err` if there's an issue reading the directory or its entries
155/// (e.g. path does not exist, permissions error).
156///
157/// # Panics
158///
159/// - If `entry.path().to_str()` returns `None` (i.e. the path is not valid UTF-8).
160///
161/// # Returns
162///
163/// `io::Result::Ok(Vec<String>)` containing the full paths of all files found.
164#[allow(dead_code)]
165pub fn get_all_files(dir_path: &str) -> io::Result<Vec<String>> {
166    let mut files_found = Vec::new();
167
168    for entry_result in std::fs::read_dir(dir_path)? {
169        let entry = entry_result?;
170        let path = entry.path();
171
172        if path.is_file() {
173            let path_str = path
174                .to_str()
175                .unwrap_or_else(|| panic!("Path contains non-UTF8 characters: {}", path.display()))
176                .to_string();
177            files_found.push(path_str);
178        } else if path.is_dir() {
179            let sub_dir_path_str = path.to_str().unwrap_or_else(|| {
180                panic!(
181                    "Subdirectory path contains non-UTF8 characters: {}",
182                    path.display()
183                )
184            });
185            let mut sub_files = get_all_files(sub_dir_path_str)?;
186            files_found.append(&mut sub_files);
187        }
188    }
189
190    Ok(files_found)
191}
192
193#[cfg(test)]
194mod tests {
195    use super::*;
196    use crate::sat::literal::{DoubleLiteral, PackedLiteral};
197    use std::io::Cursor;
198
199    type TestCnf = Cnf<PackedLiteral, smallvec::SmallVec<[PackedLiteral; 8]>>;
200
201    #[test]
202    fn test_parse_simple_dimacs() {
203        let dimacs_content = "c This is a comment\n\
204                              p cnf 3 2\n\
205                              1 -2 0\n\
206                              2 3 0\n";
207        let reader = Cursor::new(dimacs_content);
208        let result = parse_dimacs(reader);
209        assert!(result.is_ok(), "Parsing should succeed");
210        let cnf: TestCnf = result.unwrap();
211
212        assert_eq!(cnf.clauses.len(), 2, "Should parse 2 clauses");
213        assert_eq!(cnf.num_vars, 3 + 1, "Number of variables mismatch");
214
215        assert_eq!(cnf.clauses[0].len(), 2);
216        let c1_lits: Vec<i32> = cnf.clauses[0]
217            .iter()
218            .map(Literal::to_i32)
219            .sorted()
220            .collect();
221        assert_eq!(c1_lits, vec![-2, 1]);
222
223        assert_eq!(cnf.clauses[1].len(), 2);
224        let c2_lits: Vec<i32> = cnf.clauses[1]
225            .iter()
226            .map(Literal::to_i32)
227            .sorted()
228            .collect();
229        assert_eq!(c2_lits, vec![2, 3]);
230    }
231
232    #[test]
233    fn test_parse_dimacs_with_empty_lines_and_end_marker() {
234        let dimacs_content = "p cnf 2 2\n\
235                              \n\
236                              1 0\n\
237                              \n\
238                              -2 0\n\
239                              %\n\
240                              c this should be ignored";
241        let reader = Cursor::new(dimacs_content);
242        let result = parse_dimacs(reader);
243        assert!(result.is_ok(), "Parsing should succeed");
244        let cnf: TestCnf = result.unwrap();
245
246        assert_eq!(cnf.clauses.len(), 2);
247        assert_eq!(cnf.num_vars, 2 + 1);
248        assert_eq!(cnf.clauses[0].iter().next().unwrap().to_i32(), 1);
249        assert_eq!(cnf.clauses[1].iter().next().unwrap().to_i32(), -2);
250    }
251
252    #[test]
253    fn test_parse_dimacs_empty_clause() {
254        let dimacs_content = "p cnf 1 1\n0\n";
255        let reader = Cursor::new(dimacs_content);
256        let result = parse_dimacs(reader);
257        assert!(result.is_ok(), "Parsing should succeed");
258        let cnf: TestCnf = result.unwrap();
259
260        assert_eq!(cnf.clauses.len(), 0, "Should parse 0 clauses");
261    }
262
263    #[test]
264    fn test_parse_dimacs_malformed_literal() {
265        let dimacs_content = "1 abc 0\n";
266        let reader = Cursor::new(dimacs_content);
267        let result: Result<Cnf<DoubleLiteral, Vec<DoubleLiteral>>, String> = parse_dimacs(reader);
268        assert!(
269            result.is_err(),
270            "Parsing should fail due to malformed literal"
271        );
272    }
273
274    #[test]
275    fn test_parse_dimacs_no_clauses() {
276        let dimacs_content = "p cnf 0 0\n";
277        let reader = Cursor::new(dimacs_content);
278        let result = parse_dimacs(reader);
279        assert!(
280            result.is_ok(),
281            "Parsing should succeed even with no clauses"
282        );
283        let cnf: TestCnf = result.unwrap();
284        assert!(cnf.clauses.is_empty());
285        assert_eq!(cnf.num_vars, 1);
286    }
287}