sat_solver/sat/
preprocessing.rs

1#![warn(clippy::all, clippy::pedantic, clippy::nursery, clippy::cargo)]
2//! Defines traits and implementations for preprocessing SAT formulas.
3//!
4//! Preprocessing techniques aim to simplify a CNF formula before handing it
5//! to the main SAT solver. This can significantly reduce the search space and
6//! improve solver performance. Common techniques include:
7//! - Eliminating tautological clauses (e.g. `x V !x V y`).
8//! - Eliminating pure literals (literals that only appear with one polarity).
9//! - Eliminating subsumed clauses (e.g. if `(x V y)` exists, `(x V y V z)` is subsumed).
10//!
11//! This module provides:
12//! - The `Preprocessor` trait, defining a common interface for preprocessing steps.
13//! - `PreprocessorChain` for applying multiple preprocessors in sequence.
14//! - Implementations for several common preprocessing techniques.
15
16use crate::sat::clause::Clause;
17use crate::sat::clause_storage::LiteralStorage;
18use crate::sat::literal::Literal;
19use rustc_hash::FxHashSet;
20use std::fmt::Debug;
21use std::sync::Arc;
22
23/// Defines the interface for a CNF formula preprocessor.
24///
25/// A preprocessor takes a list of clauses (representing a CNF formula)
26/// and returns a new list of clauses, presumably simplified or transformed
27/// in a way that preserves satisfiability.
28///
29/// # Type Parameters
30///
31/// * `L`: The `Literal` type used in clauses.
32/// * `S`: The `LiteralStorage` type for clauses.
33pub trait Preprocessor<L: Literal, S: LiteralStorage<L>> {
34    /// Applies the preprocessing technique to the given CNF formula.
35    ///
36    /// # Arguments
37    ///
38    /// * `cnf`: A slice of `Clause<L, S>` representing the input formula.
39    ///
40    /// # Returns
41    ///
42    /// A `Vec<Clause<L, S>>` representing the preprocessed formula.
43    /// This new vector might contain fewer clauses, modified clauses, or new clauses.
44    fn preprocess(&self, cnf: &[Clause<L, S>]) -> Vec<Clause<L, S>>;
45}
46
47/// A chain of preprocessors, allowing multiple preprocessing techniques
48/// to be applied sequentially.
49///
50/// Preprocessors are stored as `Arc<dyn Preprocessor<L, S>>` to allow for
51/// different concrete preprocessor types in the same chain.
52///
53/// # Type Parameters
54///
55/// * `L`: The `Literal` type.
56/// * `S`: The `LiteralStorage` type.
57#[derive(Clone, Default)]
58pub struct PreprocessorChain<L: Literal, S: LiteralStorage<L>> {
59    preprocessors: Vec<Arc<dyn Preprocessor<L, S>>>,
60}
61
62/// Custom `Debug` implementation for `PreprocessorChain`.
63///
64/// As `dyn Preprocessor` is not `Debug`, we provide a simple placeholder debug output.
65impl<L: Literal, S: LiteralStorage<L>> Debug for PreprocessorChain<L, S> {
66    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
67        f.debug_struct("PreprocessorChain")
68            .field("num_preprocessors", &self.preprocessors.len())
69            .finish()
70    }
71}
72
73impl<L: Literal, S: LiteralStorage<L>> PreprocessorChain<L, S> {
74    /// Creates a new, empty `PreprocessorChain`.
75    #[must_use]
76    #[allow(dead_code)]
77    pub const fn new() -> Self {
78        Self {
79            preprocessors: Vec::new(),
80        }
81    }
82
83    /// Adds a preprocessor to the chain.
84    ///
85    /// The preprocessor is wrapped in an `Arc` to be stored in the chain.
86    /// This method consumes `self` and returns a new chain with the added preprocessor.
87    ///
88    /// # Arguments
89    ///
90    /// * `preprocessor`: An instance of a type `P` that implements `Preprocessor<L, S>`
91    ///   and is `'static` (required for `dyn Trait`).
92    ///
93    /// # Returns
94    ///
95    /// A new `PreprocessorChain` with the added preprocessor.
96    #[must_use]
97    #[allow(dead_code)]
98    pub fn add_preprocessor<P: Preprocessor<L, S> + 'static>(mut self, preprocessor: P) -> Self {
99        let arc_preprocessor = Arc::new(preprocessor);
100        self.preprocessors.push(arc_preprocessor);
101        self
102    }
103}
104
105impl<L: Literal, S: LiteralStorage<L>> Preprocessor<L, S> for PreprocessorChain<L, S> {
106    /// Applies each preprocessor in the chain sequentially.
107    ///
108    /// The output of one preprocessor becomes the input to the next.
109    /// The initial input `cnf` is cloned to start the process.
110    ///
111    /// # Arguments
112    ///
113    /// * `cnf`: A slice of `Clause<L, S>` representing the input formula.
114    ///
115    /// # Returns
116    ///
117    /// A `Vec<Clause<L, S>>` representing the formula after all preprocessors
118    /// in the chain have been applied.
119    fn preprocess(&self, cnf: &[Clause<L, S>]) -> Vec<Clause<L, S>> {
120        self.preprocessors
121            .iter()
122            .fold(Vec::from(cnf), |current_cnf, preprocessor_arc| {
123                preprocessor_arc.preprocess(&current_cnf)
124            })
125    }
126}
127
128/// Preprocessor for Pure Literal Elimination.
129///
130/// A pure literal is a literal that appears in the formula with only one polarity
131/// (e.g. `x` appears but `!x` does not, or vice versa).
132/// If a literal `l` is pure:
133/// - All clauses containing `l` can be satisfied by setting `l` to true. These clauses can be removed.
134/// - Literals that are pure do not need to be part of the simplified formula sent to the solver,
135///   as their assignment is determined.
136///
137/// This implementation removes clauses containing any pure literal.
138#[derive(Clone, Copy, Default, Debug, PartialEq, Eq)]
139pub struct PureLiteralElimination;
140
141impl PureLiteralElimination {
142    /// Finds all pure literals in the given CNF formula.
143    ///
144    /// A literal `l` is pure if its negation `!l` does not appear in any clause
145    /// while `l` itself does appear.
146    ///
147    /// The complexity is roughly proportional to the total number of literals in the CNF.
148    ///
149    /// # Arguments
150    ///
151    /// * `cnf`: A slice of `Clause<L, S>`.
152    ///
153    /// # Returns
154    ///
155    /// A `FxHashSet<L>` containing all pure literals found.
156    pub fn find_pures<L: Literal, S: LiteralStorage<L>>(cnf: &[Clause<L, S>]) -> FxHashSet<L> {
157        let mut pures = FxHashSet::default();
158        let mut impures = FxHashSet::default();
159
160        for clause in cnf {
161            for &lit in clause.iter() {
162                let var = lit.variable();
163                if impures.contains(&var) {
164                    continue;
165                }
166
167                if pures.contains(&lit.negated()) {
168                    pures.remove(&lit.negated());
169                    impures.insert(var);
170                    pures.remove(&lit);
171                } else if !impures.contains(&var) {
172                    pures.insert(lit);
173                }
174            }
175        }
176
177        pures
178    }
179}
180
181impl<L: Literal, S: LiteralStorage<L>> Preprocessor<L, S> for PureLiteralElimination {
182    /// Applies pure literal elimination.
183    ///
184    /// 1. Finds all pure literals in the `cnf`.
185    /// 2. Removes all clauses that contain any of these pure literals.
186    ///    (Assigning a pure literal `l` to true satisfies any clause containing `l`.)
187    ///
188    /// The complexity is dominated by `find_pures` and iterating through clauses again.
189    fn preprocess(&self, cnf: &[Clause<L, S>]) -> Vec<Clause<L, S>> {
190        let mut current_cnf = cnf.to_vec();
191
192        let pures = Self::find_pures(current_cnf.as_slice());
193
194        if pures.is_empty() {
195            return current_cnf;
196        }
197
198        current_cnf.retain(|clause| !clause.iter().any(|lit| pures.contains(lit)));
199
200        current_cnf
201    }
202}
203
204/// Preprocessor for Subsumption Elimination.
205///
206/// A clause C1 subsumes another clause C2 if all literals of C1 are also present in C2
207/// (i.e. C1 is a sub-clause of C2). If C1 subsumes C2, C2 is redundant and can be removed.
208/// For example, if `(x V y)` exists, then `(x V y V z)` is subsumed.
209///
210/// This implementation assumes literals within clauses are sorted for efficient checking.
211#[derive(Clone, Copy, Default, Debug, PartialEq, Eq)]
212pub struct SubsumptionElimination;
213
214impl SubsumptionElimination {
215    /// Checks if `clause_potential_subsumer` subsumes `clause_potential_subsumed`.
216    /// Assumes literals within each clause are sorted by some consistent criteria
217    /// (e.g. by `Literal::index()` or by `Variable` then `polarity`).
218    ///
219    /// `clause_potential_subsumer` subsumes `clause_potential_subsumed` if
220    /// `literals(clause_potential_subsumer)` is a subset of `literals(clause_potential_subsumed)`.
221    ///
222    fn is_subset_of<L: Literal, S: LiteralStorage<L>>(
223        shorter_clause: &Clause<L, S>,
224        longer_clause: &Clause<L, S>,
225    ) -> bool {
226        if shorter_clause.len() > longer_clause.len() {
227            return false; // cannot be a subset if it's longer.
228        }
229        if shorter_clause.is_empty() {
230            return true; // empty set is a subset of any set.
231        }
232
233        let mut shorter_iter = shorter_clause.iter();
234        let mut longer_iter = longer_clause.iter();
235
236        let mut current_shorter = shorter_iter.next();
237        let mut current_longer = longer_iter.next();
238
239        while let Some(s_lit) = current_shorter {
240            while let Some(l_lit) = current_longer {
241                if l_lit < s_lit {
242                    current_longer = longer_iter.next();
243                } else {
244                    break;
245                }
246            }
247
248            match current_longer {
249                Some(l_lit) if l_lit == s_lit => {
250                    current_shorter = shorter_iter.next();
251                    current_longer = longer_iter.next();
252                }
253                _ => {
254                    return false;
255                }
256            }
257        }
258        true
259    }
260}
261
262impl<L: Literal, S: LiteralStorage<L>> Preprocessor<L, S> for SubsumptionElimination {
263    /// Applies subsumption elimination.
264    ///
265    /// Iterates through pairs of clauses. If clause `C_i` subsumes `C_j`, `C_j` is marked for removal.
266    /// To optimise, clauses are typically sorted by length first: a shorter clause can subsume
267    /// a longer one, but not vice versa.
268    fn preprocess(&self, cnf: &[Clause<L, S>]) -> Vec<Clause<L, S>> {
269        let result = cnf.to_vec();
270        if result.len() < 2 {
271            return result;
272        }
273
274        let mut sorted_indices: Vec<usize> = (0..result.len()).collect();
275        sorted_indices.sort_by_key(|&i| result[i].len());
276
277        let mut removed_flags = vec![false; result.len()];
278        let mut num_removed = 0;
279
280        for i in 0..sorted_indices.len() {
281            let idx_i = sorted_indices[i];
282            if removed_flags[idx_i] {
283                continue;
284            }
285
286            for &idx_j in sorted_indices.iter().skip(i + 1) {
287                if removed_flags[idx_j] {
288                    continue;
289                }
290
291                if Self::is_subset_of(&result[idx_i], &result[idx_j]) {
292                    removed_flags[idx_j] = true;
293                    num_removed += 1;
294                }
295            }
296        }
297
298        if num_removed == 0 {
299            return result;
300        }
301
302        let mut final_clauses = Vec::with_capacity(result.len() - num_removed);
303        for i in 0..result.len() {
304            if !removed_flags[i] {
305                final_clauses.push(result[i].clone());
306            }
307        }
308        final_clauses
309    }
310}
311
312/// Preprocessor for Tautology Elimination.
313///
314/// A tautological clause is a clause that contains both a literal and its negation
315/// (e.g. `x V !x V y`). Such clauses are always true and can be removed from a CNF
316/// formula without changing its satisfiability.
317#[derive(Clone, Copy, Default, Debug, PartialEq, Eq)]
318pub struct TautologyElimination;
319
320impl<L: Literal, S: LiteralStorage<L>> Preprocessor<L, S> for TautologyElimination {
321    /// Applies tautology elimination.
322    ///
323    /// Filters out any clause `c` for which `c.is_tautology()` is true.
324    fn preprocess(&self, cnf: &[Clause<L, S>]) -> Vec<Clause<L, S>> {
325        cnf.iter()
326            .filter(|clause| !clause.is_tautology())
327            .cloned()
328            .collect()
329    }
330}
331
332#[cfg(test)]
333mod tests {
334    use super::*;
335    use crate::sat::literal::PackedLiteral;
336    use smallvec::SmallVec;
337
338    type TestLiteral = PackedLiteral;
339    type TestClauseStorage = SmallVec<[TestLiteral; 8]>;
340    type TestClause = Clause<TestLiteral, TestClauseStorage>;
341
342    fn lit(val: i32) -> TestLiteral {
343        TestLiteral::from_i32(val)
344    }
345    fn clause(lits_dimacs: &[i32]) -> TestClause {
346        lits_dimacs.iter().map(|&x| lit(x)).collect()
347    }
348    fn clause_sorted(lits_dimacs: &[i32]) -> TestClause {
349        let mut l: Vec<TestLiteral> = lits_dimacs.iter().map(|&x| lit(x)).collect();
350        l.sort();
351        Clause::new(&l)
352    }
353
354    #[test]
355    fn test_tautology_elimination() {
356        let preprocessor = TautologyElimination;
357        let cnf = vec![clause(&[1, -1, 2]), clause(&[1, 2]), clause(&[-2, 3, 2])];
358        let processed_cnf = preprocessor.preprocess(&cnf);
359        assert_eq!(processed_cnf.len(), 1);
360        assert_eq!(processed_cnf[0], clause(&[1, 2]));
361    }
362
363    #[test]
364    fn test_pure_literal_elimination() {
365        let preprocessor = PureLiteralElimination;
366        let cnf = vec![clause(&[1, 2]), clause(&[-2, -3]), clause(&[2, -2])];
367        let processed_cnf = preprocessor.preprocess(&cnf);
368        assert_eq!(processed_cnf.len(), 1);
369        assert_eq!(processed_cnf[0], clause(&[2, -2]));
370
371        let cnf_no_pures = vec![clause(&[1, 2]), clause(&[-1, -2])];
372        let processed_no_pures = preprocessor.preprocess(&cnf_no_pures);
373        assert_eq!(processed_no_pures.len(), 2);
374
375        let cnf_iterative = vec![clause(&[1]), clause(&[1, -2]), clause(&[2])];
376        let processed_iterative_pass1 = preprocessor.preprocess(&cnf_iterative);
377        assert_eq!(processed_iterative_pass1.len(), 1);
378        assert_eq!(processed_iterative_pass1[0], clause(&[2]));
379
380        let processed_iterative_pass2 = preprocessor.preprocess(&processed_iterative_pass1);
381        assert_eq!(processed_iterative_pass2.len(), 0);
382    }
383
384    #[test]
385    fn test_subsumption_elimination() {
386        let preprocessor = SubsumptionElimination;
387        let cnf = vec![
388            clause_sorted(&[1, 2]),
389            clause_sorted(&[1, 2, 3]),
390            clause_sorted(&[1, 3]),
391            clause_sorted(&[1]),
392            clause_sorted(&[4, 5]),
393        ];
394        let processed_cnf = preprocessor.preprocess(&cnf);
395
396        let expected_cnf = [clause_sorted(&[1]), clause_sorted(&[4, 5])];
397        let processed_set: FxHashSet<_> = processed_cnf.iter().cloned().collect();
398        let expected_set: FxHashSet<_> = expected_cnf.iter().cloned().collect();
399        assert_eq!(processed_set, expected_set);
400        assert_eq!(processed_cnf.len(), 2);
401    }
402
403    #[test]
404    fn test_preprocessor_chain() {
405        let cnf_initial = vec![clause(&[1, -1, 2]), clause(&[1, 2, 3]), clause(&[1, 2])];
406
407        let chain = PreprocessorChain::new()
408            .add_preprocessor(TautologyElimination)
409            .add_preprocessor(PureLiteralElimination)
410            .add_preprocessor(SubsumptionElimination);
411
412        let processed_cnf = chain.preprocess(&cnf_initial);
413        assert_eq!(processed_cnf.len(), 0);
414    }
415}