modus_lib/
translate.rs

1// Modus, a language for building container images
2// Copyright (C) 2022 University College London
3
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU Affero General Public License as
6// published by the Free Software Foundation, either version 3 of the
7// License, or (at your option) any later version.
8
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU Affero General Public License for more details.
13
14// You should have received a copy of the GNU Affero General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17use std::{collections::HashMap, sync::atomic::AtomicUsize};
18
19use crate::{
20    logic::{self, IRTerm, Predicate, SpannedPosition},
21    modusfile::{
22        self, parser::process_raw_string, Expression, FormatStringFragment, ModusClause, ModusTerm,
23    },
24    sld::{self, Auxiliary},
25};
26
27/// Returns an IRTerm to be used instead of the format string term, and a list of literals
28/// needed to make this equivalent.
29fn convert_format_string(
30    spanned_position: &SpannedPosition,
31    fragments: &Vec<FormatStringFragment>,
32) -> (Vec<logic::Literal>, IRTerm) {
33    let concat_predicate = logic::Predicate("string_concat".to_string());
34    let mut prev_variable: IRTerm = Auxiliary::aux(false);
35    let mut new_literals = vec![];
36
37    let f_string_start = spanned_position.offset + 2;
38
39    match fragments.first() {
40        Some(FormatStringFragment::StringContent(span, s)) => new_literals.push(logic::Literal {
41            positive: true,
42            position: Some(SpannedPosition {
43                offset: f_string_start,
44                length: span.offset + span.length - f_string_start,
45            }),
46            predicate: concat_predicate.clone(),
47            args: vec![
48                IRTerm::Constant("".to_owned()),
49                IRTerm::Constant(process_raw_string(&s).replace("\\$", "$")),
50                prev_variable.clone(),
51            ],
52        }),
53        Some(FormatStringFragment::InterpolatedVariable(span, v)) => {
54            new_literals.push(logic::Literal {
55                positive: true,
56                position: Some(SpannedPosition {
57                    offset: f_string_start,
58                    length: span.offset + span.length - f_string_start,
59                }),
60                predicate: concat_predicate.clone(),
61                args: vec![
62                    IRTerm::Constant("".to_owned()),
63                    IRTerm::UserVariable(v.to_string()),
64                    prev_variable.clone(),
65                ],
66            })
67        }
68        None => (),
69    }
70
71    if fragments.len() > 1 {
72        // For example, if the last var we created was v1 and we just parsed some constant
73        // string c, we add a literal `string_concat(v1, c, v2)`, creating a new variable v2.
74        for fragment in &fragments[1..] {
75            let new_var: IRTerm = Auxiliary::aux(false);
76            let (span, new_term) = match fragment {
77                FormatStringFragment::StringContent(span, s) => (
78                    span,
79                    IRTerm::Constant(process_raw_string(&s).replace("\\$", "$")),
80                ),
81                FormatStringFragment::InterpolatedVariable(span, v) => {
82                    (span, IRTerm::UserVariable(v.to_string()))
83                }
84            };
85            new_literals.push(logic::Literal {
86                positive: true,
87                // NOTE: these spans are from the start of the f-string, since that what
88                // the string_concat represet.
89                // In case, diagnostics are unclear, we could make the spans represent the
90                // disjoint string fragments instead.
91                position: Some(SpannedPosition {
92                    offset: f_string_start,
93                    length: span.offset + span.length - f_string_start,
94                }),
95                predicate: concat_predicate.clone(),
96                args: vec![prev_variable, new_term, new_var.clone()],
97            });
98            prev_variable = new_var;
99        }
100    }
101
102    (
103        new_literals,
104        // special case of empty f-string is converted to constant
105        if fragments.len() == 0 {
106            IRTerm::Constant("".into())
107        } else {
108            prev_variable
109        },
110    )
111}
112
113static OPERATOR_PAIR_ID: AtomicUsize = AtomicUsize::new(0);
114
115#[cfg(test)]
116pub(crate) fn reset_operator_pair_id() {
117    OPERATOR_PAIR_ID.store(0, std::sync::atomic::Ordering::SeqCst);
118}
119
120/// Used to generate unique predicate names in literals that replace negated expressions.
121static NEGATION_LITERAL_ID: AtomicUsize = AtomicUsize::new(0);
122
123pub(crate) fn fetch_add_negation_literal_id() -> usize {
124    NEGATION_LITERAL_ID.fetch_add(1, std::sync::atomic::Ordering::SeqCst)
125}
126
127#[cfg(test)]
128pub(crate) fn reset_negation_literal_id() {
129    NEGATION_LITERAL_ID.store(0, std::sync::atomic::Ordering::SeqCst);
130}
131
132/// Takes a ModusTerm and converts it to an IRTerm.
133///
134/// If any additional constraints are needed, such as when the term is a format
135/// string, the logic predicates are returned in a vector. They need to be added
136/// alongside whatever predicate is using this term.
137fn translate_term(t: &ModusTerm) -> (IRTerm, Vec<logic::Literal>) {
138    match t {
139        ModusTerm::Constant(c) => (IRTerm::Constant(process_raw_string(c)), Vec::new()),
140        ModusTerm::FormatString {
141            position,
142            fragments,
143        } => {
144            let (new_literals, new_var) = convert_format_string(position, fragments);
145            (new_var, new_literals)
146        }
147        ModusTerm::UserVariable(v) => (IRTerm::UserVariable(v.to_owned()), Vec::new()),
148        ModusTerm::AnonymousVariable => (sld::Auxiliary::aux(true), Vec::new()),
149        ModusTerm::Array(_, ts) => {
150            let mut new_terms = Vec::new();
151            let mut new_literals = Vec::new();
152            for term in ts {
153                let (new_term, new_lits) = translate_term(term);
154                new_terms.push(new_term);
155                new_literals.extend(new_lits);
156            }
157            (IRTerm::Array(new_terms), new_literals)
158        }
159    }
160}
161
162/// Replaces negation on expressions with literals and new clauses.
163fn handle_negation(modus_clause: &modusfile::ModusClause) -> Vec<modusfile::ModusClause> {
164    fn handle_expression(
165        expr: &modusfile::Expression,
166        clauses: &mut Vec<modusfile::ModusClause>,
167    ) -> modusfile::Expression {
168        match expr {
169            Expression::Literal(l) => Expression::Literal(l.clone()),
170            Expression::OperatorApplication(s, e, op) => Expression::OperatorApplication(
171                s.clone(),
172                Box::new(handle_expression(e, clauses)),
173                op.clone(),
174            ),
175            Expression::And(s, true, e1, e2) => Expression::And(
176                s.clone(),
177                true,
178                Box::new(handle_expression(e1, clauses)),
179                Box::new(handle_expression(e2, clauses)),
180            ),
181            Expression::Or(s, true, e1, e2) => Expression::Or(
182                s.clone(),
183                true,
184                Box::new(handle_expression(e1, clauses)),
185                Box::new(handle_expression(e2, clauses)),
186            ),
187
188            Expression::And(s, false, _, _) | Expression::Or(s, false, _, _) => {
189                let new_negate_literal = logic::Literal {
190                    positive: true,
191                    position: None,
192                    predicate: Predicate(format!("_negate_{}", fetch_add_negation_literal_id())),
193                    args: vec![], // will need to expose the variables later
194                };
195                let new_clause = modusfile::ModusClause {
196                    head: new_negate_literal.clone(),
197                    body: Some(expr.negate_current()),
198                };
199
200                clauses.extend(handle_negation(&new_clause));
201                Expression::Literal(logic::Literal {
202                    positive: false,
203                    position: s.clone(),
204                    ..new_negate_literal
205                })
206            }
207        }
208    }
209
210    let mut clauses = Vec::new();
211    let new_clause = modusfile::ModusClause {
212        head: modus_clause.head.clone(),
213        body: modus_clause
214            .body
215            .as_ref()
216            .map(|e| handle_expression(e, &mut clauses)),
217    };
218    clauses.push(new_clause);
219    clauses
220}
221
222impl From<&crate::modusfile::ModusClause> for Vec<logic::Clause> {
223    /// Convert a ModusClause into one supported by the IR.
224    /// It converts logical or/; into multiple rules, which should be equivalent.
225    fn from(modus_clause: &crate::modusfile::ModusClause) -> Self {
226        fn handle_clause(modus_clause: &modusfile::ModusClause) -> Vec<logic::Clause> {
227            match &modus_clause.body {
228                Some(Expression::Literal(l)) => {
229                    let mut literals: Vec<logic::Literal> = Vec::new();
230                    let mut new_literal_args: Vec<logic::IRTerm> = Vec::new();
231
232                    for arg in &l.args {
233                        let (translated_arg, new_literals) = translate_term(arg);
234                        new_literal_args.push(translated_arg);
235                        literals.extend_from_slice(&new_literals);
236                    }
237                    literals.push(logic::Literal {
238                        positive: l.positive,
239                        position: l.position.clone(),
240                        predicate: l.predicate.clone(),
241                        args: new_literal_args,
242                    });
243
244                    vec![logic::Clause {
245                        head: modus_clause.head.clone().into(),
246                        body: literals,
247                    }]
248                }
249
250                Some(Expression::OperatorApplication(_, expr, op)) => handle_clause(&ModusClause {
251                    head: modus_clause.head.clone(),
252                    body: Some(*expr.clone()),
253                })
254                .into_iter()
255                .map(|c| {
256                    let mut body = Vec::with_capacity(c.body.len() + 2);
257                    let mut op_args = Vec::with_capacity(op.args.len() + 1);
258                    let id = OPERATOR_PAIR_ID.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
259                    op_args.push(IRTerm::Constant(id.to_string()));
260                    op_args.extend(op.args.iter().map(|t| {
261                        let (t, nl) = translate_term(t);
262                        body.extend_from_slice(&nl);
263                        t
264                    }));
265                    body.push(logic::Literal {
266                        positive: true,
267                        position: op.position.clone(),
268                        predicate: Predicate(format!("_operator_{}_begin", &op.predicate.0)),
269                        args: op_args.clone(),
270                    });
271                    body.extend_from_slice(&c.body);
272                    body.push(logic::Literal {
273                        positive: true,
274                        position: op.position.clone(),
275                        predicate: Predicate(format!("_operator_{}_end", &op.predicate.0)),
276                        args: op_args,
277                    });
278                    logic::Clause {
279                        head: c.head.clone(),
280                        body,
281                    }
282                })
283                .collect(),
284
285                Some(Expression::And(_, true, expr1, expr2)) => {
286                    let c1 = handle_clause(&ModusClause {
287                        head: modus_clause.head.clone(),
288                        body: Some(*expr1.clone()),
289                    });
290                    let c2 = handle_clause(&ModusClause {
291                        head: modus_clause.head.clone(),
292                        body: Some(*expr2.clone()),
293                    });
294
295                    let mut clauses = Vec::new();
296                    // If we have the possible rules for left and right sub expressions,
297                    // consider the cartesian product of them.
298                    for clause1 in &c1 {
299                        for clause2 in &c2 {
300                            clauses.push(logic::Clause {
301                                head: clause1.head.clone(),
302                                body: clause1
303                                    .body
304                                    .clone()
305                                    .into_iter()
306                                    .chain(clause2.body.clone().into_iter())
307                                    .collect(),
308                            })
309                        }
310                    }
311                    clauses
312                }
313
314                Some(Expression::Or(_, true, expr1, expr2)) => {
315                    let mut c1 = handle_clause(&ModusClause {
316                        head: modus_clause.head.clone(),
317                        body: Some(*expr1.clone()),
318                    });
319                    let mut c2 = handle_clause(&ModusClause {
320                        head: modus_clause.head.clone(),
321                        body: Some(*expr2.clone()),
322                    });
323
324                    c1.append(&mut c2);
325                    c1
326                }
327
328                // negated expression pairs should be handled in a separate pass
329                Some(Expression::And(_, false, _, _)) | Some(Expression::Or(_, false, _, _)) => {
330                    unreachable!()
331                }
332
333                None => vec![logic::Clause {
334                    head: modus_clause.head.clone().into(),
335                    body: Vec::new(),
336                }],
337            }
338        }
339
340        /// Converts clauses like `_negate_id :- ...` into `_negate_id(X, Y) :- ...`.
341        /// But doesn't expose anonymous variables.
342        fn exposed_negate_clauses(ir_clauses: Vec<logic::Clause>) -> Vec<logic::Clause> {
343            let mut negated_lit_args: HashMap<&Predicate, Vec<IRTerm>> = HashMap::new();
344            for ir_clause in &ir_clauses {
345                if ir_clause.head.predicate.0.starts_with("_negate_") {
346                    let curr_args = negated_lit_args
347                        .entry(&ir_clause.head.predicate)
348                        .or_insert(Vec::new());
349                    let new_args = ir_clause.variables(false);
350                    for arg in new_args {
351                        if !curr_args.contains(&arg) {
352                            curr_args.push(arg);
353                        }
354                    }
355                }
356            }
357
358            ir_clauses
359                .iter()
360                .cloned()
361                .map(|clause| logic::Clause {
362                    head: logic::Literal {
363                        args: negated_lit_args
364                            .get(&clause.head.predicate)
365                            .map(|xs| {
366                                let mut v: Vec<logic::IRTerm> = xs.to_vec();
367                                v.sort();
368                                v
369                            })
370                            .unwrap_or(clause.head.args),
371                        ..clause.head
372                    },
373                    body: clause
374                        .body
375                        .iter()
376                        .cloned()
377                        .map(|lit| {
378                            if let Some(args) = negated_lit_args.get(&lit.predicate) {
379                                logic::Literal {
380                                    args: {
381                                        let mut v: Vec<logic::IRTerm> = args.to_vec();
382                                        v.sort();
383                                        v
384                                    },
385                                    ..lit
386                                }
387                            } else {
388                                lit
389                            }
390                        })
391                        .collect(),
392                })
393                .collect()
394        }
395
396        // convert negated expressions into negated literals, then perform translation as normal
397        let without_expr_negation = handle_negation(modus_clause);
398        let ir_clauses: Vec<logic::Clause> = without_expr_negation
399            .iter()
400            .flat_map(handle_clause)
401            .collect();
402
403        // We perform this exactly twice because of nested negation.
404        // The second time would include variables that are newly exposed in _negate_id.
405        // This isn't a fixpoint, it should require exactly two calls.
406        let exposed_1 = exposed_negate_clauses(ir_clauses);
407        exposed_negate_clauses(exposed_1)
408    }
409}
410
411pub fn translate_modusfile(mf: &modusfile::Modusfile) -> Vec<logic::Clause> {
412    mf.0.iter().flat_map(Vec::from).collect()
413}
414
415#[cfg(test)]
416mod tests {
417    use crate::logic::SpannedPosition;
418    use serial_test::serial;
419
420    use super::*;
421
422    /// Should be called if any tests rely on the variable index.
423    /// Note that the code (currently) doesn't rely on the variable indexes, just the tests, for convenience.
424    fn setup() {
425        logic::AVAILABLE_VARIABLE_INDEX.store(0, std::sync::atomic::Ordering::SeqCst);
426        reset_negation_literal_id();
427    }
428
429    #[test]
430    fn translate_constant_term() {
431        let inp1 = r#"Hello\nWorld"#;
432        let modus_term1 = ModusTerm::Constant(inp1.to_owned());
433        let ir_term = IRTerm::Constant("Hello\nWorld".to_owned());
434
435        assert_eq!(ir_term, translate_term(&modus_term1).0)
436    }
437
438    #[test]
439    #[serial]
440    fn format_string_empty() {
441        setup();
442
443        let case = Vec::new();
444
445        let lits = vec![];
446
447        assert_eq!(
448            (lits, IRTerm::Constant("".to_string())),
449            convert_format_string(
450                &SpannedPosition {
451                    offset: 0,
452                    length: 3,
453                },
454                &case
455            )
456        );
457    }
458
459    #[test]
460    #[serial]
461    fn format_string_translation() {
462        setup();
463
464        let term: ModusTerm = "f\"${ target_folder }/buildkit-frontend\"".parse().unwrap();
465        let (span, fragments) = if let ModusTerm::FormatString {
466            position,
467            fragments,
468        } = term
469        {
470            (position, fragments)
471        } else {
472            panic!("term should be f-string")
473        };
474
475        let lits = vec![
476            logic::Literal {
477                positive: true,
478                position: Some(SpannedPosition {
479                    offset: 2,
480                    length: 16,
481                }),
482                predicate: logic::Predicate("string_concat".to_owned()),
483                args: vec![
484                    IRTerm::Constant("".into()),
485                    IRTerm::UserVariable("target_folder".to_owned()),
486                    IRTerm::AuxiliaryVariable(0),
487                ],
488            },
489            logic::Literal {
490                positive: true,
491                position: Some(SpannedPosition {
492                    offset: 2,
493                    length: 36,
494                }),
495                predicate: logic::Predicate("string_concat".to_owned()),
496                args: vec![
497                    IRTerm::AuxiliaryVariable(0),
498                    IRTerm::Constant("/buildkit-frontend".to_owned()),
499                    IRTerm::AuxiliaryVariable(1),
500                ],
501            },
502        ];
503
504        assert_eq!(
505            (lits, IRTerm::AuxiliaryVariable(1)),
506            convert_format_string(&span, &fragments)
507        );
508    }
509
510    #[test]
511    #[serial]
512    fn format_string_translation_with_escape() {
513        setup();
514
515        let term: ModusTerm = r#"f"use \"${feature}\" like this \${...} \
516                                   foobar""#
517            .parse()
518            .unwrap();
519        let (span, fragments) = if let ModusTerm::FormatString {
520            position,
521            fragments,
522        } = term
523        {
524            (position, fragments)
525        } else {
526            panic!("term should be f-string")
527        };
528
529        let lits = vec![
530            logic::Literal {
531                positive: true,
532                position: Some(SpannedPosition {
533                    offset: 2,
534                    length: 6,
535                }),
536                predicate: logic::Predicate("string_concat".to_owned()),
537                args: vec![
538                    IRTerm::Constant("".into()),
539                    IRTerm::Constant("use \"".to_owned()),
540                    IRTerm::AuxiliaryVariable(0),
541                ],
542            },
543            logic::Literal {
544                positive: true,
545                position: Some(SpannedPosition {
546                    offset: 2,
547                    length: 15,
548                }),
549                predicate: logic::Predicate("string_concat".to_owned()),
550                args: vec![
551                    IRTerm::AuxiliaryVariable(0),
552                    IRTerm::UserVariable("feature".to_owned()),
553                    IRTerm::AuxiliaryVariable(1),
554                ],
555            },
556            logic::Literal {
557                positive: true,
558                position: Some(SpannedPosition {
559                    offset: 2,
560                    length: 80,
561                }),
562                predicate: logic::Predicate("string_concat".to_owned()),
563                args: vec![
564                    IRTerm::AuxiliaryVariable(1),
565                    IRTerm::Constant("\" like this ${...} foobar".to_owned()),
566                    IRTerm::AuxiliaryVariable(2),
567                ],
568            },
569        ];
570
571        assert_eq!(
572            (lits, IRTerm::AuxiliaryVariable(2)),
573            convert_format_string(&span, &fragments)
574        );
575    }
576
577    #[test]
578    #[serial]
579    fn translates_negated_and() {
580        setup();
581
582        let modus_clause: ModusClause = "foo :- !(a, b, c).".parse().unwrap();
583        let expected: Vec<logic::Clause> = vec![
584            "_negate_0 :- a, b, c.".parse().unwrap(),
585            "foo :- !_negate_0.".parse().unwrap(),
586        ];
587
588        let actual: Vec<logic::Clause> = (&modus_clause).into();
589        assert_eq!(expected.len(), actual.len());
590        assert!(expected
591            .iter()
592            .zip(actual)
593            .all(|(a, b)| a.eq_ignoring_position(&b)));
594    }
595
596    #[test]
597    #[serial]
598    fn translates_negated_or() {
599        setup();
600
601        let modus_clause: ModusClause = "foo :- !(a; b; c).".parse().unwrap();
602        let expected: Vec<logic::Clause> = vec![
603            "_negate_0 :- a.".parse().unwrap(),
604            "_negate_0 :- b.".parse().unwrap(),
605            "_negate_0 :- c.".parse().unwrap(),
606            "foo :- !_negate_0.".parse().unwrap(),
607        ];
608
609        let actual: Vec<logic::Clause> = (&modus_clause).into();
610        assert_eq!(expected.len(), actual.len());
611        assert!(expected
612            .iter()
613            .zip(actual)
614            .all(|(a, b)| a.eq_ignoring_position(&b)));
615    }
616
617    #[test]
618    #[serial]
619    fn translated_negated_with_variable() {
620        setup();
621
622        let modus_clause: ModusClause = "foo :- !(a(X), b(X)), x(X).".parse().unwrap();
623        let expected: Vec<logic::Clause> = vec![
624            "_negate_0(X) :- a(X), b(X).".parse().unwrap(),
625            "foo :- !_negate_0(X), x(X).".parse().unwrap(),
626        ];
627
628        let actual: Vec<logic::Clause> = (&modus_clause).into();
629        assert_eq!(expected.len(), actual.len());
630        assert!(expected
631            .iter()
632            .zip(actual)
633            .all(|(a, b)| a.eq_ignoring_position(&b)));
634    }
635
636    #[test]
637    #[serial]
638    fn translation_negation_with_anonymous() {
639        setup();
640
641        let modus_clause: ModusClause = "foo :- !(a(X, _) ; b(X, Y)), x(X).".parse().unwrap();
642        let expected: Vec<logic::Clause> = vec![
643            logic::Clause {
644                head: "_negate_0(X, Y)".parse().unwrap(),
645                body: vec![logic::Literal {
646                    positive: true,
647                    position: None,
648                    predicate: Predicate("a".into()),
649                    args: vec![
650                        IRTerm::UserVariable("X".into()),
651                        IRTerm::AnonymousVariable(0),
652                    ],
653                }],
654            },
655            "_negate_0(X, Y) :- b(X, Y).".parse().unwrap(),
656            "foo :- !_negate_0(X, Y), x(X).".parse().unwrap(),
657        ];
658
659        let actual: Vec<logic::Clause> = (&modus_clause).into();
660        assert_eq!(expected.len(), actual.len());
661        assert!(expected
662            .iter()
663            .zip(actual)
664            .all(|(a, b)| a.eq_ignoring_position(&b)));
665    }
666
667    #[test]
668    #[serial]
669    fn translation_nested_negation() {
670        setup();
671
672        let modus_clause: ModusClause = "foo :- !(a(Z) , !(b(X), c(Y))), x(X).".parse().unwrap();
673        let expected: Vec<logic::Clause> = vec![
674            "_negate_1(X, Y) :- b(X), c(Y).".parse().unwrap(),
675            "_negate_0(X, Y, Z) :- a(Z), !_negate_1(X, Y)."
676                .parse()
677                .unwrap(),
678            "foo :- !_negate_0(X, Y, Z), x(X).".parse().unwrap(),
679        ];
680
681        let actual: Vec<logic::Clause> = (&modus_clause).into();
682        assert_eq!(expected.len(), actual.len());
683        assert!(expected
684            .iter()
685            .zip(actual)
686            .all(|(a, b)| a.eq_ignoring_position(&b)));
687    }
688
689    #[test]
690    #[serial]
691    fn translates_anonymous_variable() {
692        setup();
693
694        let modus_clause: ModusClause = "foo(\"bar\", _).".parse().unwrap();
695        let expected: Vec<logic::Clause> = vec![logic::Clause {
696            head: logic::Literal {
697                positive: true,
698                position: None,
699                predicate: Predicate("foo".into()),
700                args: vec![
701                    IRTerm::Constant("bar".to_string()),
702                    IRTerm::AnonymousVariable(0),
703                ],
704            },
705            body: vec![],
706        }];
707        let actual: Vec<logic::Clause> = (&modus_clause).into();
708
709        for (a, b) in expected.iter().zip(actual) {
710            assert!(a.eq_ignoring_position(&b), "{} {}", a, b);
711        }
712    }
713}