sat_solver/sat/dimacs.rs
1#![warn(clippy::all, clippy::pedantic, clippy::nursery, clippy::cargo)]
2//! A parser for the DIMACS CNF (Conjunctive Normal Form) file format.
3//!
4//! The DIMACS CNF format is a standard text-based format for representing
5//! boolean satisfiability problems. This module provides functions to parse
6//! such files into an in-memory `Cnf` (Conjunctive Normal Form) structure.
7//!
8//! The format typically includes:
9//! - Comment lines starting with 'c'.
10//! - A problem line starting with 'p cnf <`num_variables`> <`num_clauses`>'.
11//! - Clause lines, where each line represents a clause. Literals are represented
12//! by integers (positive for the variable, negative for its negation).
13//! Each clause line is terminated by a '0'.
14//! - An optional '%' line to indicate end-of-data (used in competition data).
15//!
16//! This parser focuses on extracting the clause data.
17
18use crate::sat::clause_storage::LiteralStorage;
19use crate::sat::cnf::Cnf;
20use crate::sat::literal::Literal;
21use itertools::Itertools; // For `collect_vec`
22use std::io::{self, BufRead};
23use std::path::PathBuf;
24
25/// Parses a DIMACS CNF file from a `BufRead` source.
26///
27/// This function is a convenience wrapper around `parse_dimacs`.
28/// It reads the input from a string slice, which is useful for testing or
29/// when the DIMACS data is available as a string.
30///
31/// # Type Parameters
32/// * `L`: The `Literal` type to be used in the resulting `Cnf`.
33/// * `S`: The `LiteralStorage` type for clauses in the resulting `Cnf`.
34/// # Arguments
35/// * `dimacs_text`: A string slice containing the DIMACS CNF data.
36/// # Errors
37/// - If parsing fails due to malformed input (e.g. non-integer literals, missing terminators).
38pub fn parse_dimacs_text<L: Literal, S: LiteralStorage<L>>(
39 dimacs_text: &str,
40) -> Result<Cnf<L, S>, String> {
41 let reader = io::Cursor::new(dimacs_text);
42 parse_dimacs(reader)
43}
44
45/// Parses DIMACS formatted data from a `BufRead` source into a `Cnf` structure.
46///
47/// It reads the input line by line:
48/// - Lines starting with 'c' (comments) or 'p' (problem line) are currently skipped without parsing their content.
49/// - Lines starting with '%' or empty lines after the main content are treated as end-of-data markers.
50/// - Other lines are treated as clause definitions. Each number on such a line is parsed as an `i32` literal.
51/// The terminating '0' of a DIMACS clause line is filtered out.
52///
53/// # Type Parameters
54///
55/// * `R`: A type that implements `BufRead` (e.g. `io::BufReader<File>`).
56/// * `L`: The `Literal` type to be used in the resulting `Cnf`.
57/// * `S`: The `LiteralStorage` type for clauses in the resulting `Cnf`.
58///
59/// # Panics
60///
61/// - If reading a line from the `reader` fails (e.g. I/O error, invalid UTF-8).
62/// - If parsing a literal string (e.g. "1", "-2") into an `i32` fails. This implies
63/// a malformed DIMACS file if non-integer tokens appear where literals are expected.
64///
65/// # Returns
66///
67/// A `Cnf<L, S>` structure representing the parsed formula.
68/// Or `None` if no clauses were found (e.g. empty file or only comments).
69///
70/// # Errors
71/// Returns `()` if parsing fails due to malformed input (e.g. non-integer literals, missing terminators).
72pub fn parse_dimacs<R: BufRead, L: Literal, S: LiteralStorage<L>>(
73 reader: R,
74) -> Result<Cnf<L, S>, String> {
75 let mut lines = reader
76 .lines()
77 .map_while(Result::ok)
78 .filter(|line| !line.is_empty());
79
80 let mut clauses_dimacs: Vec<Vec<i32>> = Vec::new();
81
82 for line_str in &mut lines {
83 let mut parts = line_str.split_whitespace().peekable();
84
85 match parts.peek() {
86 Some(&"%") => break,
87 None | Some(&"c" | &"p") => {}
88 Some(_) => {
89 let clause_literals: Vec<i32> = parts
90 .map(|s| {
91 s.parse::<i32>()
92 .unwrap_or_else(|e| panic!("Failed to parse literal '{s}' as i32: {e}"))
93 })
94 .filter(|&p| p != 0)
95 .collect_vec();
96
97 if !clause_literals.is_empty() {
98 clauses_dimacs.push(clause_literals);
99 }
100 }
101 }
102 }
103 Ok(Cnf::new(clauses_dimacs))
104}
105
106/// Parses a DIMACS CNF file specified by its path.
107///
108/// This is a convenience function that opens the file, wraps it in a `BufReader`,
109/// and then calls `parse_dimacs`.
110///
111/// # Type Parameters
112///
113/// * `T`: The `Literal` type for the `Cnf`.
114/// * `S`: The `LiteralStorage` type for the `Cnf`.
115///
116/// # Arguments
117///
118/// * `file_path`: A string slice representing the path to the DIMACS file.
119///
120/// # Errors
121///
122/// Returns `io::Result::Err` if the file cannot be opened or read (e.g. path does not exist,
123/// permissions error). Panics from `parse_dimacs` (e.g. malformed content) will propagate.
124///
125/// # Returns
126///
127/// `io::Result::Ok(Cnf<T, S>)` if parsing is successful.
128pub fn parse_file<T: Literal, S: LiteralStorage<T>>(file_path: &PathBuf) -> io::Result<Cnf<T, S>> {
129 let file = std::fs::File::open(file_path)?;
130 let reader = io::BufReader::new(file);
131 let cnf: Cnf<T, S> = parse_dimacs(reader).map_err(|s| {
132 io::Error::new(
133 io::ErrorKind::InvalidData,
134 format!("Failed to parse DIMACS file: {}, {s}", file_path.display()),
135 )
136 })?;
137 if cnf.clauses.is_empty() {
138 return Err(io::Error::new(
139 io::ErrorKind::InvalidData,
140 format!("No clauses found in DIMACS file: {}", file_path.display()),
141 ));
142 }
143 Ok(cnf)
144}
145
146/// Recursively finds all files in a directory and its subdirectories.
147///
148/// # Arguments
149///
150/// * `dir_path`: A string slice representing the path to the directory to scan.
151///
152/// # Errors
153///
154/// Returns `io::Result::Err` if there's an issue reading the directory or its entries
155/// (e.g. path does not exist, permissions error).
156///
157/// # Panics
158///
159/// - If `entry.path().to_str()` returns `None` (i.e. the path is not valid UTF-8).
160///
161/// # Returns
162///
163/// `io::Result::Ok(Vec<String>)` containing the full paths of all files found.
164#[allow(dead_code)]
165pub fn get_all_files(dir_path: &str) -> io::Result<Vec<String>> {
166 let mut files_found = Vec::new();
167
168 for entry_result in std::fs::read_dir(dir_path)? {
169 let entry = entry_result?;
170 let path = entry.path();
171
172 if path.is_file() {
173 let path_str = path
174 .to_str()
175 .unwrap_or_else(|| panic!("Path contains non-UTF8 characters: {}", path.display()))
176 .to_string();
177 files_found.push(path_str);
178 } else if path.is_dir() {
179 let sub_dir_path_str = path.to_str().unwrap_or_else(|| {
180 panic!(
181 "Subdirectory path contains non-UTF8 characters: {}",
182 path.display()
183 )
184 });
185 let mut sub_files = get_all_files(sub_dir_path_str)?;
186 files_found.append(&mut sub_files);
187 }
188 }
189
190 Ok(files_found)
191}
192
193#[cfg(test)]
194mod tests {
195 use super::*;
196 use crate::sat::literal::{DoubleLiteral, PackedLiteral};
197 use std::io::Cursor;
198
199 type TestCnf = Cnf<PackedLiteral, smallvec::SmallVec<[PackedLiteral; 8]>>;
200
201 #[test]
202 fn test_parse_simple_dimacs() {
203 let dimacs_content = "c This is a comment\n\
204 p cnf 3 2\n\
205 1 -2 0\n\
206 2 3 0\n";
207 let reader = Cursor::new(dimacs_content);
208 let result = parse_dimacs(reader);
209 assert!(result.is_ok(), "Parsing should succeed");
210 let cnf: TestCnf = result.unwrap();
211
212 assert_eq!(cnf.clauses.len(), 2, "Should parse 2 clauses");
213 assert_eq!(cnf.num_vars, 3 + 1, "Number of variables mismatch");
214
215 assert_eq!(cnf.clauses[0].len(), 2);
216 let c1_lits: Vec<i32> = cnf.clauses[0]
217 .iter()
218 .map(Literal::to_i32)
219 .sorted()
220 .collect();
221 assert_eq!(c1_lits, vec![-2, 1]);
222
223 assert_eq!(cnf.clauses[1].len(), 2);
224 let c2_lits: Vec<i32> = cnf.clauses[1]
225 .iter()
226 .map(Literal::to_i32)
227 .sorted()
228 .collect();
229 assert_eq!(c2_lits, vec![2, 3]);
230 }
231
232 #[test]
233 fn test_parse_dimacs_with_empty_lines_and_end_marker() {
234 let dimacs_content = "p cnf 2 2\n\
235 \n\
236 1 0\n\
237 \n\
238 -2 0\n\
239 %\n\
240 c this should be ignored";
241 let reader = Cursor::new(dimacs_content);
242 let result = parse_dimacs(reader);
243 assert!(result.is_ok(), "Parsing should succeed");
244 let cnf: TestCnf = result.unwrap();
245
246 assert_eq!(cnf.clauses.len(), 2);
247 assert_eq!(cnf.num_vars, 2 + 1);
248 assert_eq!(cnf.clauses[0].iter().next().unwrap().to_i32(), 1);
249 assert_eq!(cnf.clauses[1].iter().next().unwrap().to_i32(), -2);
250 }
251
252 #[test]
253 fn test_parse_dimacs_empty_clause() {
254 let dimacs_content = "p cnf 1 1\n0\n";
255 let reader = Cursor::new(dimacs_content);
256 let result = parse_dimacs(reader);
257 assert!(result.is_ok(), "Parsing should succeed");
258 let cnf: TestCnf = result.unwrap();
259
260 assert_eq!(cnf.clauses.len(), 0, "Should parse 0 clauses");
261 }
262
263 #[test]
264 fn test_parse_dimacs_malformed_literal() {
265 let dimacs_content = "1 abc 0\n";
266 let reader = Cursor::new(dimacs_content);
267 let result: Result<Cnf<DoubleLiteral, Vec<DoubleLiteral>>, String> = parse_dimacs(reader);
268 assert!(
269 result.is_err(),
270 "Parsing should fail due to malformed literal"
271 );
272 }
273
274 #[test]
275 fn test_parse_dimacs_no_clauses() {
276 let dimacs_content = "p cnf 0 0\n";
277 let reader = Cursor::new(dimacs_content);
278 let result = parse_dimacs(reader);
279 assert!(
280 result.is_ok(),
281 "Parsing should succeed even with no clauses"
282 );
283 let cnf: TestCnf = result.unwrap();
284 assert!(cnf.clauses.is_empty());
285 assert_eq!(cnf.num_vars, 1);
286 }
287}