modus_lib/
imagegen.rs

1// Modus, a language for building container images
2// Copyright (C) 2022 University College London
3
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU Affero General Public License as
6// published by the Free Software Foundation, either version 3 of the
7// License, or (at your option) any later version.
8
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU Affero General Public License for more details.
13
14// You should have received a copy of the GNU Affero General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17use std::collections::{HashMap, HashSet};
18use std::iter::{self, FromIterator};
19use std::path::{Path, PathBuf};
20
21use crate::analysis::{Kind, ModusSemantics};
22use crate::logic::{Clause, IRTerm, Literal, Predicate};
23use crate::modusfile::{self, Modusfile};
24use crate::sld::{self, ClauseId, Proof, ResolutionError};
25use crate::translate::translate_modusfile;
26use crate::unification::Substitute;
27
28use codespan_reporting::diagnostic::Diagnostic;
29use serde::{Deserialize, Serialize};
30
31const MODUS_LABEL: &str = "com.modus-continens.literal";
32
33/// A build plan, designed to be easy to translate to buildkit and Dockerfile.
34#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct BuildPlan {
36    pub nodes: Vec<BuildNode>,
37    pub dependencies: Vec<Vec<NodeId>>,
38    pub outputs: Vec<Output>,
39}
40
41impl BuildPlan {
42    pub fn new() -> BuildPlan {
43        BuildPlan {
44            nodes: Vec::new(),
45            dependencies: Vec::new(),
46            outputs: Vec::new(),
47        }
48    }
49
50    pub fn new_node(&mut self, node: BuildNode, deps: Vec<NodeId>) -> NodeId {
51        let id = self.nodes.len();
52        self.nodes.push(node);
53        self.dependencies.push(
54            HashSet::<_>::from_iter(deps.into_iter())
55                .into_iter()
56                .collect(),
57        );
58        debug_assert_eq!(self.nodes.len(), self.dependencies.len());
59        id
60    }
61
62    /// Return an ordering of nodes in which dependencies of a node comes before
63    /// the node itself.
64    pub fn topological_order(&self) -> Vec<NodeId> {
65        let mut topological_order = Vec::with_capacity(self.nodes.len());
66        let mut seen = vec![false; self.nodes.len()];
67        fn dfs(
68            plan: &BuildPlan,
69            node: NodeId,
70            topological_order: &mut Vec<NodeId>,
71            seen: &mut Vec<bool>,
72        ) {
73            if seen[node] {
74                return;
75            }
76            for &deps in plan.dependencies[node].iter() {
77                dfs(plan, deps, topological_order, seen);
78            }
79            topological_order.push(node);
80            seen[node] = true;
81        }
82        for output in self.outputs.iter() {
83            dfs(&self, output.node, &mut topological_order, &mut seen);
84        }
85        topological_order
86    }
87}
88
89#[derive(Debug)]
90struct State {
91    current_node: Option<NodeId>,
92    cwd: String,
93    current_merge: Option<MergeNode>,
94    additional_envs: HashMap<String, String>,
95}
96
97impl State {
98    fn with_new_cwd<F: FnOnce(&mut Self)>(&mut self, new_cwd: String, f: F) {
99        let old_cwd = std::mem::replace(&mut self.cwd, new_cwd);
100        f(self);
101        self.cwd = old_cwd;
102    }
103
104    fn with_new_merge<F: FnOnce(&mut Self)>(&mut self, new_merge: MergeNode, f: F) -> MergeNode {
105        debug_assert!(self.current_merge.is_none());
106        self.current_merge = Some(new_merge);
107        f(self);
108        self.current_merge.take().unwrap()
109    }
110
111    fn has_base(&self) -> bool {
112        self.current_merge.is_some() || self.current_node.is_some()
113    }
114
115    fn set_node(&mut self, node: NodeId) {
116        debug_assert!(self.current_merge.is_none());
117        self.current_node = Some(node);
118    }
119
120    fn with_additional_envs<E: IntoIterator<Item = (String, String)>, F: FnOnce(&mut Self)>(
121        &mut self,
122        envs: E,
123        f: F,
124    ) {
125        let old_envs = self.additional_envs.clone();
126        self.additional_envs.extend(envs);
127        f(self);
128        self.additional_envs = old_envs;
129    }
130}
131
132pub type NodeId = usize;
133
134/// Represent one operation, such as `RUN` or `FROM`.
135///
136/// Think of it as one line of a Dockerfile, or one node in the buildkit graph.
137///
138/// ## Paths
139///
140/// All the paths in this structure can either be absolute or relative path. In
141/// the case of relative paths, they are ALWAYS relative to the working
142/// directory of the parent image (as stored in the image config). Translators
143/// from this to e.g. buildkit LLB should resolve the paths as necessary.
144///
145/// In the case of copy, src_path and dst_path should be resolved relative to
146/// the source image's workdir and the destination (parent) image's workdir,
147/// respectively.
148///
149/// TODO: add caching control
150#[derive(Debug, Clone, Serialize, Deserialize)]
151pub enum BuildNode {
152    From {
153        /// The actual image reference to use. Probably a resolved hash.
154        image_ref: String,
155        /// What user specified initially, such as "alpine".
156        display_name: String,
157    },
158    FromScratch {
159        /// A hack, inserted by buildkit.rs See buildkit_frontend.rs for documentation
160        scratch_ref: Option<String>,
161    },
162    Run {
163        parent: NodeId,
164        command: String,
165        cwd: String,
166        additional_envs: HashMap<String, String>,
167    },
168    CopyFromImage {
169        parent: NodeId,
170        src_image: NodeId,
171        src_path: String,
172        dst_path: String,
173    },
174    CopyFromLocal {
175        parent: NodeId,
176        src_path: String,
177        dst_path: String,
178    },
179    SetWorkdir {
180        parent: NodeId,
181        new_workdir: String,
182    },
183    SetEntrypoint {
184        parent: NodeId,
185        new_entrypoint: Vec<String>,
186    },
187    SetCmd {
188        parent: NodeId,
189        new_cmd: Vec<String>,
190    },
191    SetLabel {
192        parent: NodeId,
193        label: String,
194        value: String,
195    },
196    Merge(MergeNode),
197    SetEnv {
198        parent: NodeId,
199        key: String,
200        value: String,
201    },
202    AppendEnvValue {
203        parent: NodeId,
204        key: String,
205        value: String,
206    },
207    SetUser {
208        parent: NodeId,
209        user: String,
210    },
211}
212
213#[derive(Debug, Clone, Serialize, Deserialize)]
214pub struct MergeNode {
215    pub parent: NodeId,
216    pub operations: Vec<MergeOperation>,
217}
218
219#[derive(Debug, Clone, Serialize, Deserialize)]
220pub enum MergeOperation {
221    Run {
222        command: String,
223        cwd: String,
224        additional_envs: HashMap<String, String>,
225    },
226    CopyFromImage {
227        src_image: NodeId,
228        src_path: String,
229        dst_path: String,
230    },
231    CopyFromLocal {
232        src_path: String,
233        dst_path: String,
234    },
235}
236
237#[derive(Debug, Clone, Serialize, Deserialize)]
238pub struct Output {
239    pub node: NodeId,
240    #[serde(skip)]
241    pub source_literal: Option<Literal>,
242}
243
244/// Given a list of pairs of ground (solved) queries and their proof tree, output
245/// a build graph which builds all the queried images.
246pub fn build_dag_from_proofs(
247    query_and_proofs: &[(Literal, Proof)],
248    rules: &Vec<Clause<IRTerm>>,
249) -> BuildPlan {
250    let mut res = BuildPlan::new();
251    let mut image_literals: HashMap<Literal, NodeId> = HashMap::new();
252
253    /// Takes in a part of the build tree, assuming that it is building an image
254    /// (for example, the tree of an image literal, or a slice of a bigger tree,
255    /// where the slice contains literals that occurs between _operator_copy_begin
256    /// and _operator_copy_end).
257    ///
258    /// This function should return None if the subtree passed in does not build
259    /// an image, for example because there is no call to any intrinsic. It
260    /// should, however, report an error if there is no `from` but there is a
261    /// `run`.
262    fn process_image(
263        subtree: &[&Proof],
264        rules: &Vec<Clause<IRTerm>>,
265        res: &mut BuildPlan,
266        image_literals: &mut HashMap<Literal, NodeId>,
267        tag_with_literal: Option<String>,
268    ) -> Option<NodeId> {
269        let mut curr_state = State {
270            current_node: None,
271            cwd: "".to_string(),
272            current_merge: None,
273            additional_envs: HashMap::new(),
274        };
275
276        /* We go through the proof tree in depth-first order, since this is
277         * effectively what the build instructions are supposed to be ordered in.
278         *
279         * Consider this as an example:
280         * a :- b, c, run(5)
281         * b :- run(1)
282         * c :- run(2), d, run(4)
283         * d :- run(3)
284         *
285         * Remember that repeated call to a literal will result in the expansion
286         * of that literal to be repeated in the tree, so we don't need to worry
287         * about that. A special optimization is made for when we see a literal
288         * before any `from` (last_node == None), in which case we can check if
289         * we already built that literal as another image, and reuse when
290         * possible. In the case where we haven't, we can also store the node we
291         * built from going into that literal into the image_literals store, to
292         * be re-used later.
293         */
294        fn process_tree(
295            proof: &Proof,
296            rules: &Vec<Clause<IRTerm>>,
297            res: &mut BuildPlan,
298            image_literals: &mut HashMap<Literal, NodeId>,
299            curr_state: &mut State,
300        ) {
301            match proof.clause {
302                ClauseId::Query => {}
303                ClauseId::Builtin(ref intrinsic) => {
304                    process_intrinsic(intrinsic, res, image_literals, curr_state);
305                    debug_assert!(proof.children.is_empty()); // Intrinsics should not have children.
306                    return;
307                }
308                ClauseId::Rule(rid) => {
309                    let substituted_lit = rules[rid].head.substitute(&proof.valuation);
310                    debug_assert!(substituted_lit
311                        .args
312                        .iter()
313                        .all(|x| x.is_constant_or_compound_constant()));
314
315                    if !curr_state.has_base() {
316                        // Do the optimization mentioned above.
317                        if let Some(&node_id) = image_literals.get(&substituted_lit) {
318                            curr_state.set_node(node_id);
319                            return; // no need to recurse to children anymore.
320                        } else {
321                            if let Some(node_id) = process_image(
322                                &proof.children.iter().collect::<Vec<_>>()[..],
323                                rules,
324                                res,
325                                image_literals,
326                                Some(substituted_lit.to_string()),
327                            ) {
328                                curr_state.set_node(node_id);
329                                image_literals.insert(substituted_lit, node_id);
330                                return; // no need to recurse to children anymore, since I just built the content of this literal.
331                            } else {
332                                return; // the literal doesn't do any docker thing, so we can safely skip it.
333                            }
334                        }
335                    } else {
336                        // Can't re-use anymore since we already started an image.
337                        // In this case the subtree of this literal shouldn't be
338                        // an image anyway, so just dfs as normal.
339                    }
340                }
341                ClauseId::NegationCheck(_) => {}
342            }
343
344            process_children(
345                &proof.children.iter().collect::<Vec<_>>(),
346                rules,
347                res,
348                image_literals,
349                curr_state,
350            );
351        }
352
353        fn process_intrinsic(
354            intrinsic: &Literal,
355            res: &mut BuildPlan,
356            image_literals: &mut HashMap<Literal, NodeId>,
357            curr_state: &mut State,
358        ) {
359            let name = &intrinsic.predicate.0[..];
360            assert!(!name.starts_with("_operator_")); // operators handled separately below.
361            match name {
362                "from" => {
363                    if curr_state.current_merge.is_some() {
364                        panic!("You can not generate a new image inside a merge.");
365                    }
366                    if curr_state.has_base() {
367                        panic!("from must be the first build instruction.");
368                    }
369                    // Special sharing for the "from" intrinsic.
370                    if let Some(&existing_node) = image_literals.get(&intrinsic) {
371                        curr_state.set_node(existing_node);
372                    } else {
373                        let image_ref = intrinsic.args[0].as_constant().unwrap().to_owned();
374                        let new_node;
375                        if &image_ref == "scratch" {
376                            new_node =
377                                res.new_node(BuildNode::FromScratch { scratch_ref: None }, vec![]);
378                        } else {
379                            new_node = res.new_node(
380                                BuildNode::From {
381                                    display_name: image_ref.clone(),
382                                    image_ref,
383                                },
384                                vec![],
385                            );
386                        }
387                        curr_state.set_node(new_node);
388                        image_literals.insert(intrinsic.clone(), new_node);
389                    }
390                }
391                "run" => {
392                    let command = intrinsic.args[0].as_constant().unwrap().to_owned();
393                    if let Some(ref mut curr_merge) = curr_state.current_merge {
394                        curr_merge.operations.push(MergeOperation::Run {
395                            command,
396                            cwd: curr_state.cwd.clone(),
397                            additional_envs: curr_state.additional_envs.clone(),
398                        });
399                    } else {
400                        if !curr_state.has_base() {
401                            panic!("No base layer yet.");
402                        }
403                        let parent = curr_state.current_node.unwrap();
404                        curr_state.set_node(res.new_node(
405                            BuildNode::Run {
406                                parent: parent,
407                                command: command,
408                                cwd: curr_state.cwd.clone(),
409                                additional_envs: curr_state.additional_envs.clone(),
410                            },
411                            vec![parent],
412                        ));
413                    }
414                }
415                "copy" => {
416                    let src_path = intrinsic.args[0].as_constant().unwrap().to_owned();
417                    if src_path.starts_with("/") {
418                        panic!("The source of a local copy can not be an absolute path.");
419                    }
420                    let dst_path = intrinsic.args[1].as_constant().unwrap();
421                    let dst_path = join_path(&curr_state.cwd, dst_path);
422                    if let Some(ref mut curr_merge) = curr_state.current_merge {
423                        curr_merge
424                            .operations
425                            .push(MergeOperation::CopyFromLocal { src_path, dst_path });
426                    } else {
427                        if !curr_state.has_base() {
428                            panic!("No base layer yet.");
429                        }
430                        let parent = curr_state.current_node.unwrap();
431                        curr_state.set_node(res.new_node(
432                            BuildNode::CopyFromLocal {
433                                parent,
434                                src_path,
435                                dst_path,
436                            },
437                            vec![parent],
438                        ));
439                    }
440                }
441                _ => {
442                    // do nothing - there might be stuff like string_concat.
443                }
444            }
445        }
446
447        fn process_operator(
448            subtree_in_op: &[&Proof],
449            op_name: &str,
450            lit: &Literal, // the "begin" literal of the operator.
451            rules: &Vec<Clause<IRTerm>>,
452            res: &mut BuildPlan,
453            image_literals: &mut HashMap<Literal, NodeId>,
454            curr_state: &mut State,
455        ) {
456            match op_name {
457                // Image-to-image copy. (local copy is not an operator)
458                "copy" => {
459                    let src_image = process_image(subtree_in_op, rules, res, image_literals, None)
460                        .expect("Stuff inside this copy does not build an image.");
461                    let src_path = lit.args[1].as_constant().unwrap().to_owned();
462                    let dst_path = join_path(&curr_state.cwd, lit.args[2].as_constant().unwrap());
463                    if let Some(ref mut curr_merge) = curr_state.current_merge {
464                        curr_merge.operations.push(MergeOperation::CopyFromImage {
465                            src_image,
466                            src_path,
467                            dst_path,
468                        });
469                    } else {
470                        let parent = curr_state.current_node.expect("No base layer yet.");
471                        let node = res.new_node(
472                            BuildNode::CopyFromImage {
473                                parent,
474                                src_image,
475                                src_path,
476                                dst_path,
477                            },
478                            vec![parent, src_image],
479                        );
480                        curr_state.set_node(node);
481                    }
482                }
483                "in_workdir" => {
484                    let new_p = lit.args[1].as_constant().unwrap();
485                    let new_cwd = join_path(&curr_state.cwd, new_p);
486                    curr_state.with_new_cwd(new_cwd, |new_state| {
487                        process_children(subtree_in_op, rules, res, image_literals, new_state);
488                    });
489                    // TODO: emit a warning if the tree inside attempts
490                    // to build a fresh image - this is probably an incorrect usage.
491                }
492                "set_workdir" | "set_entrypoint" | "set_cmd" | "set_env" | "append_path"
493                | "set_label" | "set_user" => {
494                    if curr_state.current_merge.is_some() {
495                        panic!("You can not generate a new image inside a merge.");
496                    }
497                    let img = process_image(subtree_in_op, rules, res, image_literals, None)
498                        .expect(&format!("{} should be applied to an image.", op_name));
499                    if curr_state.has_base() {
500                        panic!(
501                            "{} generates a new image, so it should be the first instruction.",
502                            op_name
503                        );
504                    }
505
506                    match op_name {
507                        "set_workdir" => {
508                            let new_p = lit.args[1].as_constant().unwrap();
509                            curr_state.set_node(res.new_node(
510                                BuildNode::SetWorkdir {
511                                    parent: img,
512                                    new_workdir: join_path(&curr_state.cwd, new_p),
513                                },
514                                vec![img],
515                            ));
516                        }
517                        "set_entrypoint" => {
518                            let arg = &lit.args[1];
519                            let entrypoint = match arg {
520                                IRTerm::Constant(c) => vec![c.to_owned()],
521                                IRTerm::Array(ts) => ts
522                                    .iter()
523                                    .map(|t| t.as_constant().unwrap().to_owned())
524                                    .collect(),
525                                _ => unreachable!(),
526                            };
527                            curr_state.set_node(res.new_node(
528                                BuildNode::SetEntrypoint {
529                                    parent: img,
530                                    new_entrypoint: entrypoint,
531                                },
532                                vec![img],
533                            ));
534                        }
535                        "set_cmd" => {
536                            let arg = &lit.args[1];
537                            let cmd = match arg {
538                                IRTerm::Array(ts) => ts
539                                    .iter()
540                                    .map(|t| t.as_constant().unwrap().to_owned())
541                                    .collect::<Vec<_>>(),
542                                IRTerm::Constant(c) => vec![c.to_owned()],
543                                _ => unreachable!(),
544                            };
545                            curr_state.set_node(res.new_node(
546                                BuildNode::SetCmd {
547                                    parent: img,
548                                    new_cmd: cmd,
549                                },
550                                vec![img],
551                            ));
552                        }
553                        "set_env" => {
554                            let env_k = lit.args[1].as_constant().unwrap().to_owned();
555                            let env_v = lit.args[2].as_constant().unwrap().to_owned();
556                            curr_state.set_node(res.new_node(
557                                BuildNode::SetEnv {
558                                    parent: img,
559                                    key: env_k,
560                                    value: env_v,
561                                },
562                                vec![img],
563                            ));
564                        }
565                        "append_path" => {
566                            let append = format!(":{}", lit.args[1].as_constant().unwrap());
567                            curr_state.set_node(res.new_node(
568                                BuildNode::AppendEnvValue {
569                                    parent: img,
570                                    key: "PATH".to_owned(),
571                                    value: append,
572                                },
573                                vec![img],
574                            ));
575                        }
576                        "set_label" => {
577                            let label_k = lit.args[1].as_constant().unwrap().to_owned();
578                            let label_v = lit.args[2].as_constant().unwrap().to_owned();
579                            curr_state.set_node(res.new_node(
580                                BuildNode::SetLabel {
581                                    parent: img,
582                                    label: label_k,
583                                    value: label_v,
584                                },
585                                vec![img],
586                            ));
587                        }
588                        "set_user" => {
589                            let user = lit.args[1].as_constant().unwrap().to_owned();
590                            curr_state.set_node(
591                                res.new_node(BuildNode::SetUser { parent: img, user }, vec![img]),
592                            );
593                        }
594                        _ => unreachable!(),
595                    }
596                }
597                "merge" => {
598                    if curr_state.current_merge.is_some() {
599                        process_children(subtree_in_op, rules, res, image_literals, curr_state);
600                        return;
601                    }
602                    if !curr_state.has_base() {
603                        panic!("merge requires a base layer outside.");
604                    }
605                    let parent = curr_state.current_node.unwrap();
606                    let merge_node = MergeNode {
607                        parent,
608                        operations: vec![],
609                    };
610                    let merge_node = curr_state.with_new_merge(merge_node, |new_state| {
611                        process_children(subtree_in_op, rules, res, image_literals, new_state);
612                    });
613                    let mut deps: Vec<NodeId> = merge_node
614                        .operations
615                        .iter()
616                        .filter_map(|x| match x {
617                            MergeOperation::CopyFromImage { src_image, .. } => Some(*src_image),
618                            // Explicitly list out all the no-dependency cases to prevent future errors.
619                            MergeOperation::CopyFromLocal { .. } | MergeOperation::Run { .. } => {
620                                None
621                            }
622                        })
623                        .collect();
624                    deps.push(parent);
625                    curr_state.set_node(res.new_node(BuildNode::Merge(merge_node), deps));
626                }
627                "in_env" => {
628                    let env_k = lit.args[1].as_constant().unwrap().to_owned();
629                    let env_v = lit.args[2].as_constant().unwrap().to_owned();
630                    curr_state.with_additional_envs([(env_k, env_v)], |new_state| {
631                        process_children(subtree_in_op, rules, res, image_literals, new_state);
632                    });
633                }
634                _ => {
635                    panic!("Unkown operator: {}", op_name);
636                }
637            }
638        }
639
640        fn process_children(
641            children: &[&Proof],
642            rules: &Vec<Clause<IRTerm>>,
643            res: &mut BuildPlan,
644            image_literals: &mut HashMap<Literal, NodeId>,
645            curr_state: &mut State,
646        ) {
647            let mut i = 0usize;
648            while i < children.len() {
649                let child = children[i];
650                if let ClauseId::Builtin(ref lit) = child.clause {
651                    let name = &lit.predicate.0;
652                    if let Some(op_name) = name
653                        .strip_prefix("_operator_")
654                        .and_then(|s| s.strip_suffix("_begin"))
655                    {
656                        // due to the way things work, the end predicate for this is
657                        // guarenteed to be in the same level.
658                        let end_name = format!("_operator_{}_end", op_name);
659                        let pair_id = lit.args[0].as_constant().unwrap();
660                        let mut j = i + 1;
661                        while !{
662                            if let ClauseId::Builtin(ref lit) = children[j].clause {
663                                lit.predicate.0 == end_name
664                                    && lit.args[0].as_constant() == Some(pair_id)
665                            } else {
666                                false
667                            }
668                        } {
669                            j += 1;
670                        }
671                        // at this point j points to the end predicate.
672
673                        let subtree_in_op = &children[i + 1..j];
674                        process_operator(
675                            subtree_in_op,
676                            op_name,
677                            lit,
678                            rules,
679                            res,
680                            image_literals,
681                            curr_state,
682                        );
683                        i = j + 1;
684                        continue;
685                    }
686                }
687                process_tree(child, rules, res, image_literals, curr_state);
688                i += 1;
689            }
690        }
691
692        process_children(subtree, rules, res, image_literals, &mut curr_state);
693
694        debug_assert!(curr_state.current_merge.is_none());
695
696        if curr_state.current_node.is_some() && tag_with_literal.is_some() {
697            let node = curr_state.current_node.unwrap();
698            let tagged_node = res.new_node(
699                BuildNode::SetLabel {
700                    parent: node,
701                    label: MODUS_LABEL.to_owned(),
702                    value: tag_with_literal.unwrap().to_owned(),
703                },
704                vec![node],
705            );
706            curr_state.set_node(tagged_node);
707        }
708        curr_state.current_node
709    }
710
711    for (query, proof) in query_and_proofs.into_iter() {
712        debug_assert!(query
713            .args
714            .iter()
715            .all(|x| x.is_constant_or_compound_constant()));
716        if let Some(&existing_node_id) = image_literals.get(&query) {
717            // TODO: unreachable?
718            res.outputs.push(Output {
719                node: existing_node_id,
720                source_literal: Some(query.clone()),
721            });
722            continue;
723        }
724        if let Some(node_id) = process_image(
725            &[proof],
726            rules,
727            &mut res,
728            &mut image_literals,
729            Some(query.to_string()),
730        ) {
731            image_literals.insert(query.clone(), node_id);
732            res.outputs.push(Output {
733                node: node_id,
734                source_literal: Some(query.clone()),
735            });
736        } else {
737            panic!("{} does not resolve to any docker instructions.", query);
738        }
739    }
740
741    res
742}
743
744fn join_path(base: &str, path: &str) -> String {
745    match Path::new(base).join(path).to_str() {
746        Some(s) => s.to_owned(),
747        None => panic!("Path containing invalid utf-8 are not allowed."),
748    }
749}
750
751pub fn plan_from_modusfile(
752    mf: Modusfile,
753    query: modusfile::Expression,
754) -> Result<BuildPlan, Vec<Diagnostic<()>>> {
755    // 1. Adds a new clause based on the user's expression query to the Modusfile, `_query :- ...`.
756    // 2. Translates the Modusfile to IR.
757    // 3. Find proof for `_query`. We need to do this, and not just find proof of the image literal due to any
758    //    possible logical constraints.
759    // 4. Modify proof to give proof for the single image literal. The other literals, if any, should
760    //    only be logic literals.
761    //
762    // Operators on image literals will not work.
763
764    fn validate_query_expression(query: &modusfile::Expression) -> Result<(), Vec<Diagnostic<()>>> {
765        // ensures that no operators were used
766        match query {
767            modusfile::Expression::Literal(_) => Ok(()),
768            modusfile::Expression::OperatorApplication(_, _, _) => {
769                Err(vec![Diagnostic::error().with_message(
770                    "Operators in queries are currently unsupported.",
771                )])
772            }
773            // There shouldn't be any issue with negation in queries.
774            modusfile::Expression::And(_, _, e1, e2) | modusfile::Expression::Or(_, _, e1, e2) => {
775                validate_query_expression(e1)?;
776                validate_query_expression(e2)
777            }
778        }
779    }
780
781    fn get_image_literal(
782        query: &modusfile::Expression,
783        mf_with_query: &Modusfile,
784        ir_q_clause: &Clause,
785    ) -> Result<Literal<IRTerm>, Vec<Diagnostic<()>>> {
786        let mut errs = Vec::new();
787
788        if let Err(mut es) = validate_query_expression(query) {
789            errs.append(&mut es);
790        }
791
792        let query_lits = query.literals();
793
794        let kind_res = mf_with_query.kinds();
795
796        let image_count = query_lits
797            .iter()
798            .filter(|query_lit| kind_res.pred_kind.get(&query_lit.predicate) == Some(&Kind::Image))
799            .count();
800        if image_count != 1 {
801            errs.push(Diagnostic::error().with_message(format!("There must be exactly one image predicate in the query, but {image_count} were found.")));
802        }
803
804        let layer_count = query_lits
805            .iter()
806            .filter(|query_lit| kind_res.pred_kind.get(&query_lit.predicate) == Some(&Kind::Layer))
807            .count();
808        if layer_count > 0 {
809            errs.push(Diagnostic::error().with_message(format!(
810                "Layer predicates in queries are currently unsupported, but we found {layer_count}"
811            )));
812        }
813
814        if !errs.is_empty() {
815            return Err(errs);
816        }
817
818        let expression_image_literal = query_lits
819            .iter()
820            .find(|lit| kind_res.pred_kind.get(&lit.predicate) == Some(&Kind::Image))
821            .unwrap();
822        let image_literal = ir_q_clause
823            .body
824            .iter()
825            .find(|lit| lit.predicate == expression_image_literal.predicate)
826            .expect("should find matching predicate name after translation");
827        Ok(image_literal.clone())
828    }
829
830    let max_depth = 175;
831
832    let goal_pred = Predicate("_query".to_owned());
833    let user_clause = modusfile::ModusClause {
834        head: Literal {
835            positive: true,
836            position: None,
837            predicate: goal_pred.clone(),
838            args: Vec::new(),
839        },
840        body: Some(query.clone()),
841    };
842
843    let mf_with_query = Modusfile(mf.0.into_iter().chain(iter::once(user_clause)).collect());
844    let ir_clauses: Vec<Clause> = translate_modusfile(&mf_with_query);
845
846    let q_clause = ir_clauses
847        .iter()
848        .find(|c| c.head.predicate == goal_pred)
849        .expect("should find same predicate name after translation");
850    let query_goal = &q_clause.body;
851
852    let image_literal = get_image_literal(&query, &mf_with_query, q_clause)?;
853
854    // don't store full tree as this takes a lot of memory, and is probably not needed
855    // when building/transpiling
856    let success_tree = Result::from(sld::sld(&ir_clauses, &query_goal, max_depth, false))?;
857    let proofs = sld::proofs(&success_tree, &ir_clauses, &query_goal);
858
859    let query_and_proofs = proofs
860        .into_iter()
861        .map(|(_, p)| (image_literal.substitute(&p.valuation), p))
862        .collect::<Vec<_>>();
863    Ok(build_dag_from_proofs(&query_and_proofs[..], &ir_clauses))
864}