nickel_lang_core/
cache.rs

1//! Various caches for artifacts generated across the whole pipeline: source code, parsed
2//! representations, imports data (dependencies and reverse dependencies, etc.)
3//!
4//! In order to manage the complexity of correctly borrowing such structures, where the arena
5//! allocation of ASTs requires usage of self-borrowing structures, the main cache is split in
6//! different subcaches that can be borrowed independently.
7pub use ast_cache::AstCache;
8
9use crate::{
10    bytecode::ast::{
11        self,
12        compat::{ToAst, ToMainline},
13        Ast, AstAlloc, TryConvert,
14    },
15    closurize::Closurize as _,
16    error::{Error, ImportError, ParseError, ParseErrors, TypecheckError},
17    eval::cache::Cache as EvalCache,
18    eval::Closure,
19    files::{FileId, Files},
20    identifier::LocIdent,
21    metrics::measure_runtime,
22    package::PackageMap,
23    parser::{lexer::Lexer, ErrorTolerantParser, ExtendedTerm},
24    position::TermPos,
25    program::FieldPath,
26    stdlib::{self as nickel_stdlib, StdlibModule},
27    term::{self, RichTerm, Term},
28    transform::{import_resolution, Wildcards},
29    traverse::{Traverse, TraverseOrder},
30    typ::{self as mainline_typ, UnboundTypeVariableError},
31    typecheck::{self, typecheck, HasApparentType, TypecheckMode},
32    {eval, parser, transform},
33};
34
35#[cfg(feature = "nix-experimental")]
36use crate::nix_ffi;
37
38use std::{
39    collections::{hash_map, HashMap, HashSet},
40    ffi::{OsStr, OsString},
41    fmt, fs,
42    io::{self, Read},
43    path::{Path, PathBuf},
44    result::Result,
45    sync::Arc,
46    time::SystemTime,
47};
48
49use ouroboros::self_referencing;
50
51/// Error when trying to add bindings to the typing context where the given term isn't a record
52/// literal.
53pub struct NotARecord;
54
55/// Supported input formats.
56#[derive(Default, Clone, Copy, Eq, Debug, PartialEq, Hash)]
57pub enum InputFormat {
58    #[default]
59    Nickel,
60    Json,
61    Yaml,
62    Toml,
63    #[cfg(feature = "nix-experimental")]
64    Nix,
65    Text,
66}
67
68impl InputFormat {
69    /// Returns an [InputFormat] based on the file extension of a path.
70    pub fn from_path(path: impl AsRef<Path>) -> Option<InputFormat> {
71        match path.as_ref().extension().and_then(OsStr::to_str) {
72            Some("ncl") => Some(InputFormat::Nickel),
73            Some("json") => Some(InputFormat::Json),
74            Some("yaml") | Some("yml") => Some(InputFormat::Yaml),
75            Some("toml") => Some(InputFormat::Toml),
76            #[cfg(feature = "nix-experimental")]
77            Some("nix") => Some(InputFormat::Nix),
78            Some("txt") => Some(InputFormat::Text),
79            _ => None,
80        }
81    }
82
83    pub fn to_str(&self) -> &'static str {
84        match self {
85            InputFormat::Nickel => "Nickel",
86            InputFormat::Json => "Json",
87            InputFormat::Yaml => "Yaml",
88            InputFormat::Toml => "Toml",
89            InputFormat::Text => "Text",
90            #[cfg(feature = "nix-experimental")]
91            InputFormat::Nix => "Nix",
92        }
93    }
94
95    /// Extracts format embedded in SourcePath
96    pub fn from_source_path(source_path: &SourcePath) -> Option<InputFormat> {
97        if let SourcePath::Path(_p, fmt) = source_path {
98            Some(*fmt)
99        } else {
100            None
101        }
102    }
103}
104
105impl fmt::Display for InputFormat {
106    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
107        write!(f, "{}", self.to_str())
108    }
109}
110
111impl std::str::FromStr for InputFormat {
112    type Err = ();
113
114    fn from_str(s: &str) -> Result<Self, Self::Err> {
115        Ok(match s {
116            "Json" => InputFormat::Json,
117            "Nickel" => InputFormat::Nickel,
118            "Text" => InputFormat::Text,
119            "Yaml" => InputFormat::Yaml,
120            "Toml" => InputFormat::Toml,
121            #[cfg(feature = "nix-experimental")]
122            "Nix" => InputFormat::Nix,
123            _ => return Err(()),
124        })
125    }
126}
127
128/// The term cache stores the parsed values (the runtime representation) of sources.
129#[derive(Debug, Clone)]
130pub struct TermCache {
131    /// The term table stores parsed terms corresponding to the entries of the file database.
132    terms: HashMap<FileId, TermEntry>,
133}
134
135#[derive(Debug, Clone, Copy, Eq, PartialEq)]
136pub struct TermNotFound;
137
138impl TermCache {
139    pub fn new() -> Self {
140        TermCache {
141            terms: HashMap::new(),
142        }
143    }
144
145    /// Updates the state of an entry and returns the previous state, or an error if the entry
146    /// isn't in the cache.
147    pub fn update_state(
148        &mut self,
149        file_id: FileId,
150        new: TermEntryState,
151    ) -> Result<TermEntryState, TermNotFound> {
152        self.terms
153            .get_mut(&file_id)
154            .map(|TermEntry { state, .. }| std::mem::replace(state, new))
155            .ok_or(TermNotFound)
156    }
157
158    /// Applies term transformation excepted import resolution, implemented in a separate phase.
159    fn transform(
160        &mut self,
161        wildcards: &WildcardsCache,
162        import_data: &ImportData,
163        file_id: FileId,
164    ) -> Result<CacheOp<()>, TermCacheError<UnboundTypeVariableError>> {
165        match self.terms.get(&file_id).map(|entry| entry.state) {
166            Some(state) if state >= TermEntryState::Transformed => Ok(CacheOp::Cached(())),
167            Some(state) => {
168                if state < TermEntryState::Transforming {
169                    let cached_term = self.terms.remove(&file_id).unwrap();
170                    let term =
171                        transform::transform(cached_term.term, wildcards.wildcards.get(&file_id))?;
172                    self.insert(
173                        file_id,
174                        TermEntry {
175                            term,
176                            state: TermEntryState::Transforming,
177                            ..cached_term
178                        },
179                    );
180
181                    let imported: Vec<_> = import_data.imports(file_id).collect();
182                    for file_id in imported {
183                        self.transform(wildcards, import_data, file_id)?;
184                    }
185
186                    // unwrap(): we re-inserted the entry after removal and transformation, so it
187                    // should be in the cache.
188                    let _ = self
189                        .update_state(file_id, TermEntryState::Transformed)
190                        .unwrap();
191                }
192
193                Ok(CacheOp::Done(()))
194            }
195            None => Err(CacheError::IncompatibleState {
196                want: TermEntryState::Populated,
197            }),
198        }
199    }
200
201    /// Retrieves the state of an entry. Returns `None` if the entry is not in the term cache. This
202    /// might happen if the file hasn't been parsed, or if the term cache hasn't be filled from the
203    /// AST cache yet. The latter is supposed to happen right before program transformations.
204    pub fn entry_state(&self, file_id: FileId) -> Option<TermEntryState> {
205        self.terms
206            .get(&file_id)
207            .map(|TermEntry { state, .. }| *state)
208    }
209
210    /// Replaces a cache entry by a closurized version of itself. If it contains imports,
211    /// closurize them recursively.
212    ///
213    /// Closurization is not required before evaluation, but it has two benefits:
214    ///
215    /// - the closurized term uses the evaluation cache, so if it is imported in multiple
216    ///   places then they will share a cache
217    /// - the eval cache's built-in mechanism for preventing infinite recursion will also
218    ///   apply to recursive imports.
219    ///
220    /// The main disadvantage of closurization is that it makes the resulting runtime
221    /// representation less useful. You wouldn't want to closurize before pretty-printing, for
222    /// example. This isn't as important these days, since we also have the AST representation at
223    /// hand.
224    pub fn closurize<C: EvalCache>(
225        &mut self,
226        cache: &mut C,
227        import_data: &ImportData,
228        file_id: FileId,
229    ) -> Result<CacheOp<()>, TermCacheError<()>> {
230        match self.entry_state(file_id) {
231            Some(state) if state >= TermEntryState::Closurized => Ok(CacheOp::Cached(())),
232            Some(_) => {
233                let cached_term = self.terms.remove(&file_id).unwrap();
234                let term = cached_term.term.closurize(cache, eval::Environment::new());
235                self.insert(
236                    file_id,
237                    TermEntry {
238                        term,
239                        state: TermEntryState::Closurized,
240                        ..cached_term
241                    },
242                );
243
244                let imported: Vec<_> = import_data.imports(file_id).collect();
245                for file_id in imported {
246                    self.closurize(cache, import_data, file_id)?;
247                }
248
249                Ok(CacheOp::Done(()))
250            }
251            None => Err(CacheError::IncompatibleState {
252                want: TermEntryState::Populated,
253            }),
254        }
255    }
256
257    /// Returns an immutable reference to the whole term cache.
258    pub fn terms(&self) -> &HashMap<FileId, TermEntry> {
259        &self.terms
260    }
261
262    /// Retrieves a fresh clone of a cached term.
263    pub fn get_owned(&self, file_id: FileId) -> Option<RichTerm> {
264        self.terms
265            .get(&file_id)
266            .map(|TermEntry { term, .. }| term.clone())
267    }
268
269    /// Retrieves a reference to a cached term.
270    pub fn get(&self, file_id: FileId) -> Option<&RichTerm> {
271        self.terms.get(&file_id).map(|TermEntry { term, .. }| term)
272    }
273
274    /// Retrieves the whole entry for a given file id.
275    pub fn get_entry(&self, file_id: FileId) -> Option<&TermEntry> {
276        self.terms.get(&file_id)
277    }
278
279    /// Returns `true` if the term cache contains a term for the given file id.
280    pub fn contains(&self, file_id: FileId) -> bool {
281        self.terms.contains_key(&file_id)
282    }
283
284    /// Inserts a new entry in the cache. Usually, this should be handled by [CacheHub] directly,
285    /// but there are some use-cases where it is useful to pre-fill the term cache (typically in
286    /// NLS).
287    pub fn insert(&mut self, file_id: FileId, entry: TermEntry) {
288        self.terms.insert(file_id, entry);
289    }
290}
291
292/// This is a temporary fix for [#2362](https://github.com/tweag/nickel/issues/2362). File paths
293/// prefixed with this are treated specially: they can refer to in-memory source. To build an
294/// import expression that refers to an in-memory source, append the source name to this prefix and
295/// use it as the path: `format!({IN_MEMORY_SOURCE_PATH_PREFIX}{src_name})`.
296pub const IN_MEMORY_SOURCE_PATH_PREFIX: &str = "%inmem_src%:";
297
298/// The source cache handles reading textual data from the file system or other souces and storing
299/// it in a [Files] instance.
300///
301/// While not ideal, we have to make most of the fields public to allow the LSP to perform its own
302/// import resolution.
303#[derive(Clone)]
304pub struct SourceCache {
305    /// The content of the program sources plus imports.
306    pub files: Files,
307    /// Reverse map from file ids to source paths.
308    pub file_paths: HashMap<FileId, SourcePath>,
309    /// The name-id table, holding file ids stored in the database indexed by source names.
310    pub file_ids: HashMap<SourcePath, NameIdEntry>,
311    /// Paths where to look for imports, as included by the user through either the CLI argument
312    /// `--import-path` or the environment variable `$NICKEL_IMPORT_PATH`.
313    pub import_paths: Vec<PathBuf>,
314    /// A table mapping FileIds to the package that they belong to.
315    ///
316    /// Path dependencies have already been canonicalized to absolute paths.
317    pub packages: HashMap<FileId, PathBuf>,
318    /// The map used to resolve package imports.
319    pub package_map: Option<PackageMap>,
320}
321
322impl SourceCache {
323    pub fn new() -> Self {
324        SourceCache {
325            files: Files::new(),
326            file_paths: HashMap::new(),
327            file_ids: HashMap::new(),
328            import_paths: Vec::new(),
329            packages: HashMap::new(),
330            package_map: None,
331        }
332    }
333
334    /// Retrieves the name of a source given an id.
335    pub fn name(&self, file_id: FileId) -> &OsStr {
336        self.files.name(file_id)
337    }
338
339    /// Add paths to the import path list, where the resolver is looking for imported files.
340    pub fn add_import_paths<P>(&mut self, paths: impl Iterator<Item = P>)
341    where
342        PathBuf: From<P>,
343    {
344        self.import_paths.extend(paths.map(PathBuf::from));
345    }
346
347    /// Sets the package map to use for package import resolution.
348    pub fn set_package_map(&mut self, map: PackageMap) {
349        self.package_map = Some(map);
350    }
351
352    /// Same as [Self::add_file], but assumes that the path is already normalized and takes the
353    /// timestamp as a parameter.
354    fn add_normalized_file(
355        &mut self,
356        path: PathBuf,
357        format: InputFormat,
358        timestamp: SystemTime,
359    ) -> io::Result<FileId> {
360        let contents = std::fs::read_to_string(&path)?;
361        let file_id = self.files.add(&path, contents);
362
363        self.file_paths
364            .insert(file_id, SourcePath::Path(path.clone(), format));
365        self.file_ids.insert(
366            SourcePath::Path(path, format),
367            NameIdEntry {
368                id: file_id,
369                source: SourceKind::Filesystem(timestamp),
370            },
371        );
372        Ok(file_id)
373    }
374
375    /// Loads a file and adds it to the name-id table.
376    ///
377    /// Uses the normalized path and the *modified at* timestamp as the name-id table entry.
378    /// Overrides any existing entry with the same name.
379    pub fn add_file(
380        &mut self,
381        path: impl Into<OsString>,
382        format: InputFormat,
383    ) -> io::Result<FileId> {
384        let path = path.into();
385        let timestamp = timestamp(&path)?;
386        let normalized = normalize_path(&path)?;
387        self.add_normalized_file(normalized, format, timestamp)
388    }
389
390    /// Try to retrieve the id of a file from the cache.
391    ///
392    /// If it was not in cache, try to read it and add it as a new entry.
393    ///
394    /// # In memory sources
395    ///
396    /// As a temporary fix for [#2362](https://github.com/tweag/nickel/issues/2362), if a file path
397    /// starts with [IN_MEMORY_SOURCE_PATH_PREFIX], the suffix is looked up un-normalized value
398    /// first, which makes it possible to hit in-memory only sources by importing a path
399    /// `"{SOURCE_PATH_PREFX}{src_name}"`. If it can't be found, it is looked up normally, so that
400    /// it doesn't break strange file names that happen to contain the source path prefix.
401    ///
402    /// It is theoretically possible that if both the source "abc" and the file
403    /// "{IN_MEMORY_SOURCE_PATH_PREFIX}abc" exist, the source is imported instead of the intended
404    /// file. However, given the prefix, it just can't be accidental. As we want to give access to
405    /// in-memory sources in any case, although this can be surprising, I don't see any obvious
406    /// attack scenario here. This fix is also intended to be temporary. If you still need to make
407    /// sure this doesn't happen, one way would be to add some randomness to the name of the
408    /// sources, so that they can't be predicted beforehand.
409    pub fn get_or_add_file(
410        &mut self,
411        path: impl Into<OsString>,
412        format: InputFormat,
413    ) -> io::Result<CacheOp<FileId>> {
414        let path = path.into();
415        let normalized = normalize_path(&path)?;
416
417        // Try to fetch a generated source if the path starts with a hardcoded prefix
418        let generated_entry = path
419            .to_str()
420            .and_then(|p| p.strip_prefix(IN_MEMORY_SOURCE_PATH_PREFIX))
421            .and_then(|src_name| {
422                self.file_ids
423                    .get(&SourcePath::Path(src_name.into(), format))
424            });
425
426        if let Some(entry) = generated_entry {
427            return Ok(CacheOp::Cached(entry.id));
428        }
429
430        match self.id_or_new_timestamp_of(normalized.as_ref(), format)? {
431            SourceState::UpToDate(id) => Ok(CacheOp::Cached(id)),
432            SourceState::Stale(timestamp) => self
433                .add_normalized_file(normalized, format, timestamp)
434                .map(CacheOp::Done),
435        }
436    }
437
438    /// Load a source and add it to the name-id table.
439    ///
440    /// Do not check if a source with the same name already exists: if it is the case,
441    /// [Self::add_source] will happily will override the old entry in the name-id table.
442    pub fn add_source<T>(&mut self, source_name: SourcePath, mut source: T) -> io::Result<FileId>
443    where
444        T: Read,
445    {
446        let mut buffer = String::new();
447        source.read_to_string(&mut buffer)?;
448        Ok(self.add_string(source_name, buffer))
449    }
450
451    /// Returns the content of a file.
452    ///
453    /// Panics if the file id is invalid.
454    pub fn source(&self, id: FileId) -> &str {
455        self.files.source(id)
456    }
457
458    /// Returns a cloned `Arc` to the content of the file.
459    ///
460    /// The `Arc` is here for the LSP, where the background evaluation is handled by background
461    /// threads and processes.
462    ///
463    /// Panics if the file id is invalid.
464    pub fn clone_source(&self, id: FileId) -> Arc<str> {
465        self.files.clone_source(id)
466    }
467
468    /// Loads a new source as a string and add it to the name-id table.
469    ///
470    /// Do not check if a source with the same name already exists: if it is the case, this one
471    /// will override the old entry in the name-id table but the old `FileId` will remain valid.
472    pub fn add_string(&mut self, source_name: SourcePath, s: String) -> FileId {
473        let id = self.files.add(source_name.clone(), s);
474
475        self.file_paths.insert(id, source_name.clone());
476        self.file_ids.insert(
477            source_name,
478            NameIdEntry {
479                id,
480                source: SourceKind::Memory,
481            },
482        );
483        id
484    }
485
486    /// Loads a new source as a string, replacing any existing source with the same name.
487    ///
488    /// As opposed to [CacheHub::replace_string], this method doesn't update the other caches. It
489    /// just affects the source cache.
490    pub fn replace_string(&mut self, source_name: SourcePath, s: String) -> FileId {
491        if let Some(file_id) = self.id_of(&source_name) {
492            // The file may have been originally loaded from the filesystem and then
493            // updated by the LSP, so the SourceKind needs to be updated to Memory.
494            self.file_ids.insert(
495                source_name,
496                NameIdEntry {
497                    id: file_id,
498                    source: SourceKind::Memory,
499                },
500            );
501            self.files.update(file_id, s);
502            file_id
503        } else {
504            // We re-use [Self::add_string] here to properly fill the file_paths and file_ids
505            // tables.
506            self.add_string(source_name, s)
507        }
508    }
509
510    /// Closes a file that has been opened in memory and reloads it from the filesystem.
511    /// Returns the file ID of the replacement file loaded from the filesystem.
512    pub fn close_in_memory_file(
513        &mut self,
514        path: PathBuf,
515        format: InputFormat,
516    ) -> Result<FileCloseResult, FileCloseError> {
517        let entry = self
518            .file_ids
519            .get_mut(&SourcePath::Path(path.clone(), format))
520            .ok_or(FileCloseError::FileIdNotFound)?;
521        match &entry.source {
522            SourceKind::Memory => {
523                let closed_id = entry.id;
524                entry.source = SourceKind::MemoryClosed;
525                let replacement_id = self.get_or_add_file(path, format).map(|op| op.inner());
526                Ok(FileCloseResult {
527                    closed_id,
528                    replacement_id,
529                })
530            }
531            _ => Err(FileCloseError::FileNotOpen),
532        }
533    }
534
535    /// Retrieves the id of a source given a name.
536    ///
537    /// Note that files added via [Self::add_file] are indexed by their full normalized path (cf
538    /// [normalize_path]).
539    pub fn id_of(&self, name: &SourcePath) -> Option<FileId> {
540        match name {
541            SourcePath::Path(p, fmt) => match self.id_or_new_timestamp_of(p, *fmt).ok()? {
542                SourceState::UpToDate(id) => Some(id),
543                SourceState::Stale(_) => None,
544            },
545            name => Some(self.file_ids.get(name)?.id),
546        }
547    }
548
549    /// Tries to retrieve the id of a cached source.
550    ///
551    /// Only returns `Ok` if the source is up-to-date; if the source is stale, returns
552    /// either the new timestamp of the up-to-date file or the error we encountered when
553    /// trying to read it (which most likely means there was no such file).
554    ///
555    /// The main point of this awkward signature is to minimize I/O operations: if we accessed
556    /// the timestamp, keep it around.
557    fn id_or_new_timestamp_of(&self, name: &Path, format: InputFormat) -> io::Result<SourceState> {
558        match self
559            .file_ids
560            .get(&SourcePath::Path(name.to_owned(), format))
561        {
562            None
563            | Some(NameIdEntry {
564                source: SourceKind::MemoryClosed,
565                ..
566            }) => Ok(SourceState::Stale(timestamp(name)?)),
567            Some(NameIdEntry {
568                id,
569                source: SourceKind::Filesystem(ts),
570            }) => {
571                let new_timestamp = timestamp(name)?;
572                if ts == &new_timestamp {
573                    Ok(SourceState::UpToDate(*id))
574                } else {
575                    Ok(SourceState::Stale(new_timestamp))
576                }
577            }
578            Some(NameIdEntry {
579                id,
580                source: SourceKind::Memory,
581            }) => Ok(SourceState::UpToDate(*id)),
582        }
583    }
584
585    /// Gets a reference to the underlying files. Required by the WASM REPL error reporting code
586    /// and LSP functions.
587    pub fn files(&self) -> &Files {
588        &self.files
589    }
590
591    /// Parses a Nickel source without querying nor populating other caches.
592    pub fn parse_nickel<'ast>(
593        &self,
594        // We take the allocator explicitly, to make sure `self.asts` is properly initialized
595        // before calling this function, and won't be dropped.
596        alloc: &'ast AstAlloc,
597        file_id: FileId,
598    ) -> Result<Ast<'ast>, ParseErrors> {
599        parse_nickel(alloc, file_id, self.files.source(file_id))
600    }
601
602    /// Parses a source that isn't Nickel code without querying nor populating the other caches. Support
603    /// multiple formats.
604    ///
605    /// The Nickel/non Nickel distinction is a bit artificial at the moment, due to the fact that
606    /// parsing Nickel returns the new [crate::bytecode::ast::Ast], while parsing other formats
607    /// don't go through the new AST first but directly deserialize to the legacy
608    /// [crate::term::Term] for simplicity and performance reasons.
609    ///
610    /// Once RFC007 is fully implemented, we might clean it up.
611    ///
612    /// # Panic
613    ///
614    /// This function panics if `format` is [InputFormat::Nickel].
615    pub fn parse_other(
616        &self,
617        file_id: FileId,
618        format: InputFormat,
619    ) -> Result<RichTerm, ParseError> {
620        let attach_pos = |t: RichTerm| -> RichTerm {
621            let pos: TermPos = self.files.source_span(file_id).into();
622            t.with_pos(pos)
623        };
624
625        let source = self.files.source(file_id);
626
627        match format {
628            InputFormat::Nickel => {
629                // Panicking isn't great, but we expect this to be temporary, until RFC007 is fully
630                // implemented. And this case is an internal bug.
631                panic!("error: trying to parse a Nickel source with parse_other_nocache")
632            }
633            InputFormat::Json => serde_json::from_str(source)
634                .map(attach_pos)
635                .map_err(|err| ParseError::from_serde_json(err, file_id, &self.files)),
636            InputFormat::Yaml => crate::serialize::yaml::load_yaml_term(source, Some(file_id)),
637            InputFormat::Toml => crate::serialize::toml_deser::from_str(source, file_id)
638                .map(attach_pos)
639                .map_err(|err| (ParseError::from_toml(err, file_id))),
640            #[cfg(feature = "nix-experimental")]
641            InputFormat::Nix => {
642                let json = nix_ffi::eval_to_json(source, &self.get_base_dir_for_nix(file_id))
643                    .map_err(|e| ParseError::from_nix(e.what(), file_id))?;
644                serde_json::from_str(&json)
645                    .map(attach_pos)
646                    .map_err(|err| ParseError::from_serde_json(err, file_id, &self.files))
647            }
648            InputFormat::Text => Ok(attach_pos(Term::Str(source.into()).into())),
649        }
650    }
651
652    /// Returns true if a particular file id represents a Nickel standard library file, false
653    /// otherwise.
654    pub fn is_stdlib_module(&self, file: FileId) -> bool {
655        self.files.is_stdlib(file)
656    }
657
658    /// Retrieves the file id for a given standard libray module.
659    pub fn get_submodule_file_id(&self, module: StdlibModule) -> Option<FileId> {
660        self.stdlib_modules()
661            .find(|(m, _id)| m == &module)
662            .map(|(_, id)| id)
663    }
664
665    /// Returns the list of file ids corresponding to the standard library modules.
666    pub fn stdlib_modules(&self) -> impl Iterator<Item = (StdlibModule, FileId)> {
667        self.files.stdlib_modules()
668    }
669
670    /// Return the format of a given source. Returns `None` if there is no entry in the source
671    /// cache for `file_id`, or if there is no well-defined input format (e.g. for REPL inputs,
672    /// field assignments, etc.).
673    pub fn input_format(&self, file_id: FileId) -> Option<InputFormat> {
674        self.file_paths
675            .get(&file_id)
676            .and_then(|source| match source {
677                SourcePath::Path(_, input_format) => Some(*input_format),
678                SourcePath::Std(_) => Some(InputFormat::Nickel),
679                SourcePath::Snippet(_)
680                | SourcePath::Query
681                | SourcePath::ReplInput(_)
682                | SourcePath::ReplTypecheck
683                | SourcePath::ReplQuery
684                | SourcePath::CliFieldAssignment
685                | SourcePath::Override(_)
686                | SourcePath::Generated(_) => None,
687            })
688    }
689
690    /// Returns the base path for Nix evaluation, which is the parent directory of the source file
691    /// if any, or the current working directory, or an empty path if we couldn't find any better.
692    #[cfg(feature = "nix-experimental")]
693    fn get_base_dir_for_nix(&self, file_id: FileId) -> PathBuf {
694        let parent_dir = self
695            .file_paths
696            .get(&file_id)
697            .and_then(|source_path| Path::new(<&OsStr>::try_from(source_path).ok()?).parent());
698
699        parent_dir
700            .map(PathBuf::from)
701            .or_else(|| std::env::current_dir().ok())
702            .unwrap_or_default()
703    }
704}
705
706/// Stores the mapping of each wildcard id to its inferred type, for each file in the cache.
707#[derive(Default, Clone, Debug)]
708pub struct WildcardsCache {
709    wildcards: HashMap<FileId, Wildcards>,
710}
711
712impl WildcardsCache {
713    pub fn new() -> Self {
714        Self::default()
715    }
716
717    pub fn get(&self, file_id: FileId) -> Option<&Wildcards> {
718        self.wildcards.get(&file_id)
719    }
720}
721
722/// Metadata about an imported file.
723#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
724pub struct ImportTarget {
725    pub file_id: FileId,
726    pub format: InputFormat,
727}
728
729/// Stores dependencies and reverse dependencies data between sources.
730#[derive(Default, Clone)]
731pub struct ImportData {
732    /// A map containing for each FileId a list of files they import (directly).
733    pub imports: HashMap<FileId, HashSet<ImportTarget>>,
734    /// A map containing for each FileId a list of files importing them (directly). Note that we
735    /// don't need to store the format here, as only Nickel files can import other files. We do
736    /// however store the position of the first import expression (the same file can be imported
737    /// many times from a given file), for error reporting purpose.
738    pub rev_imports: HashMap<FileId, HashMap<FileId, TermPos>>,
739}
740
741impl ImportData {
742    pub fn new() -> Self {
743        Self::default()
744    }
745
746    /// Returns the set of files that this file imports.
747    pub fn imports(&self, file: FileId) -> impl Iterator<Item = FileId> + '_ {
748        self.imports
749            .get(&file)
750            .into_iter()
751            .flat_map(|s| s.iter())
752            .map(|tgt| tgt.file_id)
753    }
754
755    /// Returns the set of files that import this file.
756    pub fn rev_imports(&self, file: FileId) -> impl Iterator<Item = FileId> + '_ {
757        self.rev_imports
758            .get(&file)
759            .into_iter()
760            .flat_map(|h| h.keys())
761            .copied()
762    }
763
764    /// Returns the set of files that transitively depend on this file.
765    pub fn transitive_rev_imports(&self, file: FileId) -> HashSet<FileId> {
766        let mut ret = HashSet::new();
767        let mut stack = vec![file];
768
769        while let Some(file) = stack.pop() {
770            for f in self.rev_imports(file) {
771                if ret.insert(f) {
772                    stack.push(f);
773                }
774            }
775        }
776
777        ret
778    }
779
780    /// Returns the set of files that this file transitively depends on.
781    pub fn transitive_imports(&self, file: FileId) -> HashSet<FileId> {
782        let mut ret = HashSet::new();
783        let mut stack = vec![file];
784
785        while let Some(file) = stack.pop() {
786            for f in self.imports(file) {
787                if ret.insert(f) {
788                    stack.push(f);
789                }
790            }
791        }
792
793        ret
794    }
795
796    /// Returns `true` if those import data are empty.
797    pub fn is_empty(&self) -> bool {
798        self.imports.is_empty() && self.rev_imports.is_empty()
799    }
800}
801
802/// The cache hub aggregates the various kind of source-related caches used by Nickel.
803///
804/// [CacheHub] handles parsing, typechecking and program transformation of sources, as well as
805/// caching the corresponding artifacts (text, ASTs, state). This is the central entry point for
806/// other modules.
807///
808/// # RFC007
809///
810/// As part of the migration to a new AST required by RFC007, as long as we don't have a fully
811/// working bytecode virtual machine, the cache needs to keep parsed expressions both as the old
812/// representation (dubbed "mainline" or the runtime representation in many places) and as the new
813/// AST representation.
814pub struct CacheHub {
815    pub terms: TermCache,
816    pub sources: SourceCache,
817    pub asts: AstCache,
818    pub wildcards: WildcardsCache,
819    pub import_data: ImportData,
820    #[cfg(debug_assertions)]
821    /// Skip loading the stdlib, used for debugging purpose
822    pub skip_stdlib: bool,
823}
824
825impl CacheHub {
826    pub fn new() -> Self {
827        CacheHub {
828            terms: TermCache::new(),
829            sources: SourceCache::new(),
830            asts: AstCache::empty(),
831            wildcards: WildcardsCache::new(),
832            import_data: ImportData::new(),
833            #[cfg(debug_assertions)]
834            skip_stdlib: false,
835        }
836    }
837
838    /// Actual implementation of [Self::parse_ast] which doesn't take `self` as a parameter, so that it
839    /// can be reused from other places when we don't have a full [CacheHub] instance at hand.
840    fn parse_ast_impl(
841        asts: &mut AstCache,
842        sources: &mut SourceCache,
843        file_id: FileId,
844    ) -> Result<CacheOp<()>, ParseErrors> {
845        if asts.contains(file_id) {
846            Ok(CacheOp::Cached(()))
847        } else {
848            let _ = asts.parse_nickel(file_id, sources.files.source(file_id))?;
849            Ok(CacheOp::Done(()))
850        }
851    }
852
853    /// Parse a REPL input and populate the corresponding entry in the cache.
854    ///
855    /// The first component of the tuple in the `Ok` case is the identifier of the toplevel let, if
856    /// the input is a toplevel let, or `None` if the input is a standard Nickel expression.
857    ///
858    /// # RFC007
859    ///
860    /// This method populates both the ast cache and the term cache at once.
861    pub fn parse_repl(
862        &mut self,
863        file_id: FileId,
864    ) -> Result<CacheOp<Option<LocIdent>>, ParseErrors> {
865        // Since we need the identifier, we always reparse the input. In any case, it doesn't
866        // happen that we the same REPL input twice right now, so caching it is in fact useless.
867        // It's just must simpler to reuse the cache infrastructure than to reimplement the whole
868        // transformations and import dependencies tracking elsewhere.
869        let extd_ast = self
870            .asts
871            .parse_nickel_repl(file_id, self.sources.files.source(file_id))?;
872
873        let (id, ast) = match extd_ast {
874            ExtendedTerm::Term(t) => (None, t),
875            ExtendedTerm::ToplevelLet(id, t) => (Some(id), t),
876        };
877
878        let term = measure_runtime!("runtime:ast_conversion", ast.to_mainline());
879
880        self.terms.insert(
881            file_id,
882            TermEntry {
883                term,
884                state: TermEntryState::default(),
885                format: InputFormat::Nickel,
886            },
887        );
888
889        Ok(CacheOp::Done(id))
890    }
891
892    /// Parses a source and populate the corresponding entry in the AST cache, or do nothing if the
893    /// entry has already been parsed. External input formats are currently directly parsed to the
894    /// runtime representation, without going through an AST: currently, the format is assumed to
895    /// be [InputFormat::Nickel] in this method. See [Self::parse_to_term] for other formats.
896    ///
897    /// # RFC007
898    ///
899    /// This method only populates the AST cache. The term cache must be filled separately.
900    pub fn parse_to_ast(&mut self, file_id: FileId) -> Result<CacheOp<()>, ParseErrors> {
901        Self::parse_ast_impl(&mut self.asts, &mut self.sources, file_id)
902    }
903
904    /// Parses a source or compiles an AST into the term cache:
905    ///
906    /// - if the entry is already in the term cache, do nothing.
907    /// - if the format is Nickel and there is a corresponding entry in the AST cache, converts the
908    ///   parsed AST to a [RichTerm] and put it in the term cache.
909    /// - if the format is Nickel but there is no cached AST, or if the format is not Nickel, parse
910    ///   the input directly into the term cache.
911    ///
912    /// Mostly used during ([RichTerm]-based) import resolution.
913    pub fn parse_to_term(
914        &mut self,
915        file_id: FileId,
916        format: InputFormat,
917    ) -> Result<CacheOp<()>, ParseErrors> {
918        if self.terms.contains(file_id) {
919            return Ok(CacheOp::Cached(()));
920        }
921
922        let term = if let InputFormat::Nickel = format {
923            match self.compile(file_id) {
924                Ok(cache_op) => return Ok(cache_op),
925                Err(_) => {
926                    let alloc = AstAlloc::new();
927                    self.sources.parse_nickel(&alloc, file_id)?.to_mainline()
928                }
929            }
930        } else {
931            self.sources.parse_other(file_id, format)?
932        };
933
934        self.terms.insert(
935            file_id,
936            TermEntry {
937                term,
938                state: TermEntryState::default(),
939                format,
940            },
941        );
942
943        Ok(CacheOp::Done(()))
944    }
945
946    /// Typecheck an entry of the cache and update its state accordingly, or do nothing if the
947    /// entry has already been typechecked. Require that the corresponding source has been parsed.
948    /// If the source contains imports, [Self::typecheck] recursively typechecks the imports as
949    /// well.
950    ///
951    /// # RFC007
952    ///
953    /// During the transition period between the old VM and the new bytecode VM, this method
954    /// performs typechecking on the new representation [crate::bytecode::ast::Ast].
955    pub fn typecheck(
956        &mut self,
957        file_id: FileId,
958        initial_mode: TypecheckMode,
959    ) -> Result<CacheOp<()>, AstCacheError<TypecheckError>> {
960        let (slice, asts) = self.split_asts();
961        asts.typecheck(slice, file_id, initial_mode)
962    }
963
964    /// Returns the apparent type of an entry that has been typechecked with wildcards substituted.
965    pub fn type_of(
966        &mut self,
967        file_id: FileId,
968    ) -> Result<CacheOp<mainline_typ::Type>, AstCacheError<TypecheckError>> {
969        let (slice, asts) = self.split_asts();
970        asts.type_of(slice, file_id)
971    }
972
973    /// Prepares a source for evaluation: parse, typecheck and apply program transformations, if it
974    /// was not already done.
975    pub fn prepare(&mut self, file_id: FileId) -> Result<CacheOp<()>, Error> {
976        self.prepare_impl(file_id, true)
977    }
978
979    /// Prepare a file for evaluation only. Same as [Self::prepare], but doesn't typecheck the
980    /// source.
981    pub fn prepare_eval_only(&mut self, file_id: FileId) -> Result<CacheOp<()>, Error> {
982        self.prepare_impl(file_id, false)
983    }
984
985    /// Common implementation for [Self::prepare] and [Self::prepare_eval_only], which optionally
986    /// skips typechecking.
987    fn prepare_impl(&mut self, file_id: FileId, typecheck: bool) -> Result<CacheOp<()>, Error> {
988        let mut result = CacheOp::Cached(());
989
990        let format = self
991            .sources
992            .file_paths
993            .get(&file_id)
994            .and_then(InputFormat::from_source_path)
995            .unwrap_or_default();
996
997        if let InputFormat::Nickel = format {
998            if let CacheOp::Done(_) = self.parse_to_ast(file_id)? {
999                result = CacheOp::Done(());
1000            }
1001
1002            if typecheck {
1003                let (slice, asts) = self.split_asts();
1004
1005                let typecheck_res = asts
1006                    .typecheck(slice, file_id, TypecheckMode::Walk)
1007                    .map_err(|cache_err| {
1008                        cache_err.unwrap_error(
1009                            "cache::prepare(): expected source to be parsed before typechecking",
1010                        )
1011                    })?;
1012
1013                if typecheck_res == CacheOp::Done(()) {
1014                    result = CacheOp::Done(());
1015                };
1016            }
1017        }
1018        // Non-Nickel terms are currently not parsed as ASTs, but directly as the runtime
1019        // representation. While the imports of the main file will be parsed to terms by the
1020        // `compile_and_transform` automatically, we do need to ensure that the main file is in the
1021        // term cache if it's an external format, or `compile_and_transform` will complain.
1022        else if let CacheOp::Done(_) = self.parse_to_term(file_id, format)? {
1023            result = CacheOp::Done(());
1024        }
1025
1026        let transform_res = self.compile_and_transform(file_id).map_err(|cache_err| {
1027            cache_err.unwrap_error(
1028                "cache::prepare(): expected source to be parsed before transformations",
1029            )
1030        })?;
1031
1032        if transform_res == CacheOp::Done(()) {
1033            result = CacheOp::Done(());
1034        };
1035
1036        Ok(result)
1037    }
1038
1039    /// Prepare an REPL snippet for evaluation: parse, typecheck and apply program transformations,
1040    /// if it was not already done. The difference with [Self::prepare] is that this method also
1041    /// accept toplevel binding `let <id> = <value>`.
1042    ///
1043    /// Returns the identifier of the toplevel let, if the input is a toplevel let, or `None` if
1044    /// the input is a standard Nickel expression.
1045    pub fn prepare_repl(&mut self, file_id: FileId) -> Result<CacheOp<Option<LocIdent>>, Error> {
1046        let mut done = false;
1047
1048        let parsed = self.parse_repl(file_id)?;
1049
1050        done = done || matches!(parsed, CacheOp::Done(_));
1051
1052        let id = parsed.inner();
1053
1054        let (slice, asts) = self.split_asts();
1055        let typecheck_res = asts
1056            .typecheck(slice, file_id, TypecheckMode::Walk)
1057            .map_err(|cache_err| {
1058                cache_err.unwrap_error(
1059                    "cache::prepare_repl(): expected source to be parsed before typechecking",
1060                )
1061            })?;
1062
1063        if let Some(id) = id {
1064            let (slice, asts) = self.split_asts();
1065            asts
1066                .add_type_binding(
1067                    slice,
1068                    id,
1069                    file_id,
1070                ).expect("cache::prepare_repl(): expected source to be parsed before augmenting the type environment");
1071        }
1072
1073        done = done || matches!(typecheck_res, CacheOp::Done(_));
1074
1075        let transform_res = self.compile_and_transform(file_id).map_err(|cache_err| {
1076            cache_err.unwrap_error(
1077                "cache::prepare(): expected source to be parsed before transformations",
1078            )
1079        })?;
1080
1081        done = done || matches!(transform_res, CacheOp::Done(_));
1082
1083        if done {
1084            Ok(CacheOp::Done(id))
1085        } else {
1086            Ok(CacheOp::Cached(id))
1087        }
1088    }
1089
1090    /// Proxy for [TermCache::transform].
1091    fn transform(
1092        &mut self,
1093        file_id: FileId,
1094    ) -> Result<CacheOp<()>, TermCacheError<UnboundTypeVariableError>> {
1095        self.terms
1096            .transform(&self.wildcards, &self.import_data, file_id)
1097    }
1098
1099    /// Loads and parse the standard library in the AST cache.
1100    ///
1101    /// # RFC007
1102    ///
1103    /// This method doesn't populate the term cache. Use [Self::compile_stdlib] afterwards.
1104    pub fn load_stdlib(&mut self) -> Result<CacheOp<()>, Error> {
1105        let mut ret = CacheOp::Cached(());
1106
1107        for (_, file_id) in self.sources.stdlib_modules() {
1108            if let CacheOp::Done(_) = self.parse_to_ast(file_id)? {
1109                ret = CacheOp::Done(());
1110            }
1111        }
1112
1113        Ok(ret)
1114    }
1115
1116    /// Converts the parsed standard library to the runtime representation.
1117    pub fn compile_stdlib(&mut self) -> Result<CacheOp<()>, AstCacheError<()>> {
1118        let mut ret = CacheOp::Cached(());
1119
1120        for (_, file_id) in self.sources.stdlib_modules() {
1121            let result = self.compile(file_id).map_err(|cache_err| {
1122                if let CacheError::IncompatibleState { want } = cache_err {
1123                    CacheError::IncompatibleState { want }
1124                } else {
1125                    unreachable!("unexpected parse error during the compilation of stdlib")
1126                }
1127            })?;
1128
1129            if let CacheOp::Done(_) = result {
1130                ret = CacheOp::Done(());
1131            }
1132        }
1133
1134        Ok(ret)
1135    }
1136
1137    /// Typechecks the standard library. Currently only used in the test suite.
1138    pub fn typecheck_stdlib(&mut self) -> Result<CacheOp<()>, AstCacheError<TypecheckError>> {
1139        let (slice, asts) = self.split_asts();
1140        asts.typecheck_stdlib(slice)
1141    }
1142
1143    /// Loads, parses, and compiles the standard library. We don't typecheck for performance
1144    /// reasons: this is done in the test suite.
1145    pub fn prepare_stdlib(&mut self) -> Result<(), Error> {
1146        #[cfg(debug_assertions)]
1147        if self.skip_stdlib {
1148            return Ok(());
1149        }
1150
1151        self.load_stdlib()?;
1152        // unwrap(): we just loaded the stdlib, so it must be parsed in the cache.
1153        self.compile_stdlib().unwrap();
1154
1155        self.sources
1156            .stdlib_modules()
1157            // We need to handle the internals module separately. Each field
1158            // is bound directly in the environment without evaluating it first, so we can't
1159            // tolerate top-level let bindings that would be introduced by `transform`.
1160            .try_for_each(|(_, file_id)| self.transform(file_id).map(|_| ()))
1161            .map_err(|cache_err: TermCacheError<UnboundTypeVariableError>| {
1162                Error::ParseErrors(
1163                    cache_err
1164                        .unwrap_error(
1165                            "cache::prepare_stdlib(): unexpected unbound type variable error during stdlib loading",
1166                        )
1167                        .into(),
1168                )
1169            })?;
1170
1171        Ok(())
1172    }
1173
1174    /// Applies a custom transform to an input and its imports. [CacheError::IncompatibleState] is returned
1175    /// if the file has not yet been typechecked.
1176    ///
1177    /// If multiple invocations of `custom_transform` are needed, you must supply `transform_id` with
1178    /// with a number higher than that of all previous invocations.
1179    pub fn custom_transform<E>(
1180        &mut self,
1181        file_id: FileId,
1182        transform_id: usize,
1183        f: &mut impl FnMut(&mut CacheHub, RichTerm) -> Result<RichTerm, E>,
1184    ) -> Result<(), TermCacheError<E>> {
1185        match self.terms.entry_state(file_id) {
1186            None => Err(CacheError::IncompatibleState {
1187                want: TermEntryState::Populated,
1188            }),
1189            Some(state) => {
1190                if state.needs_custom_transform(transform_id) {
1191                    let cached_term = self.terms.terms.remove(&file_id).unwrap();
1192                    let term = f(self, cached_term.term)?;
1193                    self.terms.insert(
1194                        file_id,
1195                        TermEntry {
1196                            term,
1197                            state: TermEntryState::CustomTransforming,
1198                            ..cached_term
1199                        },
1200                    );
1201
1202                    let imported: Vec<_> = self.import_data.imports(file_id).collect();
1203                    for file_id in imported {
1204                        self.custom_transform(file_id, transform_id, f)?;
1205                    }
1206
1207                    // TODO: We're setting the state back to whatever it was.
1208                    // unwrap(): we inserted the term just above
1209                    let _ = self
1210                        .terms
1211                        .update_state(file_id, TermEntryState::CustomTransformed { transform_id })
1212                        .unwrap();
1213                }
1214
1215                Ok(())
1216            }
1217        }
1218    }
1219
1220    /// Resolves every imports of a term entry of the cache, and update its state accordingly, or
1221    /// do nothing if the imports of the entry have already been resolved or if they aren't Nickel
1222    /// inputs. Require that the corresponding source has been parsed.
1223    ///
1224    /// If resolved imports contain imports themselves, resolve them recursively. Returns a tuple
1225    /// of vectors, where the first component is the imports that were transitively resolved, and
1226    /// the second component is the errors it encountered while resolving imports in `file_id`,
1227    /// respectively. Imports that were already resolved before are not included in the first
1228    /// component: this return value is currently used by the LSP to re-run code analysis on new
1229    /// files/modified files.
1230    ///
1231    /// The resolved imports are ordered by a pre-order depth-first-search. In particular, earlier
1232    /// elements in the returned list might import later elements but -- unless there are cyclic
1233    /// imports -- later elements do not import earlier elements.
1234    ///
1235    /// It only accumulates errors if the cache is in error tolerant mode, otherwise it returns an
1236    /// `Err(..)` containing  a `CacheError`.
1237    ///
1238    /// # RFC007
1239    ///
1240    /// This method is still needed only because the evaluator can't handle un-resolved import, so
1241    /// we need to replace them by resolved imports. However, actual import resolution (loading and
1242    /// parsing files for the first time) is now driven by typechecking directly.
1243    pub fn resolve_imports(
1244        &mut self,
1245        file_id: FileId,
1246    ) -> Result<CacheOp<Vec<FileId>>, TermCacheError<ImportError>> {
1247        let entry = self.terms.terms.get(&file_id);
1248
1249        match entry {
1250            Some(TermEntry {
1251                state,
1252                term,
1253                format: InputFormat::Nickel,
1254            }) if *state < TermEntryState::ImportsResolving => {
1255                let term = term.clone();
1256
1257                let import_resolution::strict::ResolveResult {
1258                    transformed_term,
1259                    resolved_ids: pending,
1260                } = import_resolution::strict::resolve_imports(term, self)?;
1261
1262                // unwrap(): we called `unwrap()` at the beginning of the enclosing if branch
1263                // on the result of `self.terms.get(&file_id)`. We only made recursive calls to
1264                // `resolve_imports` in between, which don't remove anything from `self.terms`.
1265                let cached_term = self.terms.terms.get_mut(&file_id).unwrap();
1266                cached_term.term = transformed_term;
1267                cached_term.state = TermEntryState::ImportsResolving;
1268
1269                let mut done = Vec::new();
1270
1271                // Transitively resolve the imports, and accumulate the ids of the resolved
1272                // files along the way.
1273                for id in pending {
1274                    if let CacheOp::Done(mut done_local) = self.resolve_imports(id)? {
1275                        done.push(id);
1276                        done.append(&mut done_local)
1277                    }
1278                }
1279
1280                // unwrap(): if we are in this branch, the term is present in the cache
1281                let _ = self
1282                    .terms
1283                    .update_state(file_id, TermEntryState::ImportsResolved)
1284                    .unwrap();
1285
1286                Ok(CacheOp::Done(done))
1287            }
1288            // There's no import to resolve for non-Nickel inputs. We still update the state.
1289            Some(TermEntry { state, .. }) if *state < TermEntryState::ImportsResolving => {
1290                // unwrap(): if we are in this branch, the term is present in the cache
1291                let _ = self
1292                    .terms
1293                    .update_state(file_id, TermEntryState::ImportsResolved)
1294                    .unwrap();
1295                Ok(CacheOp::Cached(Vec::new()))
1296            }
1297            // [^transitory_entry_state]
1298            //
1299            // This case is triggered by a cyclic import. The entry is already
1300            // being treated by an ongoing call to `resolve_import` higher up in
1301            // the call chain, so we don't do anything here.
1302            //
1303            // Note that in some cases, this intermediate state can be observed by an
1304            // external caller: if a first call to `resolve_imports` fails in the middle of
1305            // resolving the transitive imports, the end state of the entry is
1306            // `ImportsResolving`. Subsequent calls to `resolve_imports` will succeed, but
1307            // won't change the state to `EntryState::ImportsResolved` (and for a good
1308            // reason: we wouldn't even know what are the pending imports to resolve). The
1309            // Nickel pipeline should however fail if `resolve_imports` failed at some
1310            // point, anyway.
1311            Some(TermEntry {
1312                state: TermEntryState::ImportsResolving,
1313                ..
1314            }) => Ok(CacheOp::Done(Vec::new())),
1315            // >= EntryState::ImportsResolved
1316            Some(_) => Ok(CacheOp::Cached(Vec::new())),
1317            None => Err(CacheError::IncompatibleState {
1318                want: TermEntryState::Populated,
1319            }),
1320        }
1321    }
1322
1323    /// Generate the initial evaluation environment from the list of `file_ids` corresponding to the
1324    /// standard library parts.
1325    pub fn mk_eval_env<EC: EvalCache>(&self, eval_cache: &mut EC) -> eval::Environment {
1326        let mut eval_env = eval::Environment::new();
1327
1328        for (module, file_id) in self.sources.stdlib_modules() {
1329            // The internals module needs special treatment: it's required to be a record
1330            // literal, and its bindings are added directly to the environment
1331            if let nickel_stdlib::StdlibModule::Internals = module {
1332                let result = eval::env_add_record(
1333                    eval_cache,
1334                    &mut eval_env,
1335                    Closure::atomic_closure(self.terms.get_owned(file_id).expect(
1336                        "cache::mk_eval_env(): can't build environment, stdlib not parsed",
1337                    )),
1338                );
1339                if let Err(eval::EnvBuildError::NotARecord(rt)) = result {
1340                    panic!(
1341                        "cache::Caches::mk_eval_env(): \
1342                            expected the stdlib module {} to be a record, got {:?}",
1343                        self.sources.name(file_id).to_string_lossy().as_ref(),
1344                        rt
1345                    )
1346                }
1347            } else {
1348                eval::env_add(
1349                    eval_cache,
1350                    &mut eval_env,
1351                    module.name().into(),
1352                    self.terms.get_owned(file_id).expect(
1353                        "cache::Caches::mk_eval_env(): can't build environment, stdlib not parsed",
1354                    ),
1355                    eval::Environment::new(),
1356                );
1357            }
1358        }
1359
1360        eval_env
1361    }
1362
1363    /// Loads a new source as a string, replacing any existing source with the same name.
1364    ///
1365    /// If there was a previous source with the same name, its `FileId` is reused and the cached
1366    /// term is deleted.
1367    ///
1368    /// Used to store intermediate short-lived generated snippets that needs to have a
1369    /// corresponding `FileId`, such as when querying or reporting errors.
1370    pub fn replace_string(&mut self, source_name: SourcePath, s: String) -> FileId {
1371        if let Some(file_id) = self.sources.id_of(&source_name) {
1372            self.sources.files.update(file_id, s);
1373            self.asts.remove(file_id);
1374            self.terms.terms.remove(&file_id);
1375            file_id
1376        } else {
1377            let file_id = self.sources.files.add(source_name.clone(), s);
1378            self.sources.file_paths.insert(file_id, source_name.clone());
1379            self.sources.file_ids.insert(
1380                source_name,
1381                NameIdEntry {
1382                    id: file_id,
1383                    source: SourceKind::Memory,
1384                },
1385            );
1386            file_id
1387        }
1388    }
1389
1390    pub fn closurize<EC: EvalCache>(
1391        &mut self,
1392        eval_cache: &mut EC,
1393        file_id: FileId,
1394    ) -> Result<CacheOp<()>, TermCacheError<()>> {
1395        self.terms.closurize(eval_cache, &self.import_data, file_id)
1396    }
1397
1398    /// Add the bindings of a record to the REPL type environment. Ignore fields whose name are
1399    /// defined through interpolation.
1400    pub fn add_repl_bindings(&mut self, term: &RichTerm) -> Result<(), NotARecord> {
1401        let (slice, asts) = self.split_asts();
1402        asts.add_type_bindings(slice, term)
1403    }
1404
1405    /// Converts an AST and all of its transitive dependencies to the runtime representation,
1406    /// populating the term cache. `file_id` and any of its Nickel dependencies must be present in
1407    /// the AST cache, or [CacheError::IncompatibleState] is returned. However, for non-Nickel
1408    /// dependencies, they are instead parsed directly into the term cache,
1409    ///
1410    /// "Compile" is anticipating a bit on RFC007, although it is a lowering of the AST
1411    /// representation to the runtime representation.
1412    ///
1413    /// Compilation doesn't have a proper state associated, and thus should always be coupled with
1414    /// program transformations through [Self::compile_and_transform]. It should preferably not be
1415    /// observable as an atomic transition, although as far as I can tell, this shouldn't cause
1416    /// major troubles to do so.
1417    pub fn compile(&mut self, main_id: FileId) -> Result<CacheOp<()>, AstCacheError<ImportError>> {
1418        if self.terms.contains(main_id) {
1419            return Ok(CacheOp::Cached(()));
1420        }
1421
1422        // We set the format of the main `file_id` to `Nickel`, even if it is not, to require its
1423        // presence in either the term cache or the ast cache.
1424        let mut work_stack = vec![ImportTarget {
1425            file_id: main_id,
1426            format: InputFormat::default(),
1427        }];
1428
1429        while let Some(ImportTarget { file_id, format }) = work_stack.pop() {
1430            if self.terms.contains(file_id) {
1431                continue;
1432            }
1433
1434            let entry = if let InputFormat::Nickel = format {
1435                let ast_entry =
1436                    self.asts
1437                        .get_entry(file_id)
1438                        .ok_or(CacheError::IncompatibleState {
1439                            want: AstEntryState::Parsed,
1440                        })?;
1441
1442                TermEntry {
1443                    term: ast_entry.ast.to_mainline(),
1444                    format: ast_entry.format,
1445                    state: TermEntryState::default(),
1446                }
1447            } else {
1448                // We want to maintain the same error message as before the introduction of the two
1449                // distinct representations, and their processing in two stages (first Nickel files that
1450                // have an AST, and then others before evaluation).
1451                //
1452                // If we find a non-Nickel file here that needs to be parsed, it's because it's
1453                // been imported from somewhere else. The error used to be an import error, which
1454                // includes the location of the importing expression. We thus raise an import error
1455                // here, in case of failure.
1456                let term = self
1457                    .sources
1458                    .parse_other(file_id, format)
1459                    .map_err(|parse_err| {
1460                        CacheError::Error(ImportError::ParseErrors(
1461                            parse_err.into(),
1462                            self.import_data
1463                                .rev_imports
1464                                .get(&file_id)
1465                                .and_then(|map| map.get(&main_id))
1466                                .copied()
1467                                .unwrap_or_default(),
1468                        ))
1469                    })?;
1470
1471                TermEntry {
1472                    term,
1473                    format,
1474                    state: TermEntryState::default(),
1475                }
1476            };
1477
1478            self.terms.insert(file_id, entry);
1479
1480            work_stack.extend(
1481                self.import_data
1482                    .imports
1483                    .get(&file_id)
1484                    .into_iter()
1485                    .flat_map(|set| set.iter()),
1486            )
1487        }
1488
1489        Ok(CacheOp::Done(()))
1490    }
1491
1492    /// Converts an AST entry and all of its transitive dependencies to the runtime representation
1493    /// (compile), populating the term cache. Applies both import resolution and other program
1494    /// transformations on the resulting terms.
1495    pub fn compile_and_transform(
1496        &mut self,
1497        file_id: FileId,
1498    ) -> Result<CacheOp<()>, AstCacheError<Error>> {
1499        let mut done = false;
1500
1501        done = matches!(
1502            self.compile(file_id)
1503                .map_err(|cache_err| cache_err.map_err(Error::ImportError))?,
1504            CacheOp::Done(_)
1505        ) || done;
1506
1507        let imports = self
1508            .resolve_imports(file_id)
1509            // force_cast(): since we compiled `file_id`, the term cache must be populated, and
1510            // thus `resolve_imports` should never throw `CacheError::IncompatibleState`.
1511            .map_err(|cache_err| cache_err.map_err(Error::ImportError).force_cast())?;
1512        done = matches!(imports, CacheOp::Done(_)) || done;
1513
1514        let transform = self
1515            .terms
1516            .transform(&self.wildcards, &self.import_data, file_id)
1517            // force_cast(): since we compiled `file_id`, the term cache must be populated, and
1518            // thus `resolve_imports` should never throw `CacheError::IncompatibleState`.
1519            .map_err(|cache_err| {
1520                cache_err
1521                    .map_err(|uvar_err| Error::ParseErrors(ParseErrors::from(uvar_err)))
1522                    .force_cast()
1523            })?;
1524        done = matches!(transform, CacheOp::Done(_)) || done;
1525
1526        Ok(if done {
1527            CacheOp::Done(())
1528        } else {
1529            CacheOp::Cached(())
1530        })
1531    }
1532
1533    /// Creates a partial copy of this cache for evaluation purposes only. In particular, we don't
1534    /// copy anything related to arena-allocated ASTs. However, source files, imports data and
1535    /// terms are copied over, which is useful to make new evaluation caches cheaply, typically for
1536    /// NLS and benches.
1537    pub fn clone_for_eval(&self) -> Self {
1538        Self {
1539            terms: self.terms.clone(),
1540            sources: self.sources.clone(),
1541            asts: AstCache::empty(),
1542            wildcards: self.wildcards.clone(),
1543            import_data: self.import_data.clone(),
1544            #[cfg(debug_assertions)]
1545            skip_stdlib: self.skip_stdlib,
1546        }
1547    }
1548
1549    /// Split a mutable borrow to self into a mutable borrow of the AST cache and a mutable borrow
1550    /// of the rest.
1551    pub fn split_asts(&mut self) -> (CacheHubView<'_>, &mut AstCache) {
1552        (
1553            CacheHubView {
1554                terms: &mut self.terms,
1555                sources: &mut self.sources,
1556                wildcards: &mut self.wildcards,
1557                import_data: &mut self.import_data,
1558                #[cfg(debug_assertions)]
1559                skip_stdlib: self.skip_stdlib,
1560            },
1561            &mut self.asts,
1562        )
1563    }
1564
1565    /// See [SourceCache::input_format].
1566    pub fn input_format(&self, file_id: FileId) -> Option<InputFormat> {
1567        self.sources.input_format(file_id)
1568    }
1569}
1570
1571/// Because ASTs are arena-allocated, the self-referential [ast_cache::AstCache] which holds both
1572/// the arena and references to this arena often needs special treatment, if we want to make the
1573/// borrow checker happy. The following structure is basically a view of "everything but the ast
1574/// cache" into [CacheHub], so that we can separate and pack all the rest in a single structure,
1575/// making the signature of many [ast_cache::AstCache] methods much lighter.
1576pub struct CacheHubView<'cache> {
1577    terms: &'cache mut TermCache,
1578    sources: &'cache mut SourceCache,
1579    wildcards: &'cache mut WildcardsCache,
1580    import_data: &'cache mut ImportData,
1581    #[cfg(debug_assertions)]
1582    /// Skip loading the stdlib, used for debugging purpose
1583    skip_stdlib: bool,
1584}
1585
1586impl CacheHubView<'_> {
1587    /// Make a reborrow of this slice.
1588    pub fn reborrow(&mut self) -> CacheHubView<'_> {
1589        CacheHubView {
1590            terms: self.terms,
1591            sources: self.sources,
1592            wildcards: self.wildcards,
1593            import_data: self.import_data,
1594            #[cfg(debug_assertions)]
1595            skip_stdlib: self.skip_stdlib,
1596        }
1597    }
1598}
1599
1600/// An entry in the term cache. Stores the parsed term together with metadata and state.
1601#[derive(Debug, Clone, PartialEq)]
1602pub struct TermEntry {
1603    pub term: RichTerm,
1604    pub state: TermEntryState,
1605    pub format: InputFormat,
1606}
1607
1608/// An entry in the AST cache. Stores the parsed term together with metadata and state.
1609#[derive(Debug, Clone, PartialEq)]
1610pub struct AstEntry<'ast> {
1611    pub ast: &'ast Ast<'ast>,
1612    pub state: AstEntryState,
1613    pub format: InputFormat,
1614}
1615
1616impl<'ast> AstEntry<'ast> {
1617    /// Creates a new entry with default metadata.
1618    pub fn new(ast: &'ast Ast<'ast>) -> Self {
1619        AstEntry {
1620            ast,
1621            state: AstEntryState::default(),
1622            format: InputFormat::default(),
1623        }
1624    }
1625}
1626
1627/// Inputs can be read from the filesystem or from in-memory buffers (which come, e.g., from
1628/// the REPL, the standard library, or the language server).
1629///
1630/// Inputs read from the filesystem get auto-refreshed: if we try to access them again and
1631/// the on-disk file has changed, we read it again. Inputs read from in-memory buffers
1632/// are not auto-refreshed. If an in-memory buffer has a path that also exists in the
1633/// filesystem, we will not even check that file to see if it has changed.
1634///
1635/// An input that was open as an in-memory file may be closed, namely when the file is closed
1636/// or deleted from an editor using the LSP. In this case, the file will be read from the
1637/// filesystem again instead of using the in-memory value. Closing a file only makes sense in the
1638/// case that the [SourcePath] refers to a path on the filesystem. Other types of in-memory files,
1639/// like the standard library, cannot be closed.
1640#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Copy, Clone)]
1641enum SourceKind {
1642    Filesystem(SystemTime),
1643    Memory,
1644    MemoryClosed,
1645}
1646
1647/// The errors that can occur while closing an in memory file.
1648#[derive(Debug, Clone)]
1649pub enum FileCloseError {
1650    /// The file was not closed because no mapping of the source path to a [FileId] could be
1651    /// found.
1652    FileIdNotFound,
1653    /// A file with the given path was found, but it was not open in memory.
1654    FileNotOpen,
1655}
1656
1657impl fmt::Display for FileCloseError {
1658    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1659        match &self {
1660            FileCloseError::FileIdNotFound => {
1661                write!(
1662                    f,
1663                    "No file ID could be found for the file path to be closed."
1664                )
1665            }
1666            FileCloseError::FileNotOpen => {
1667                write!(f, "Attempted to close a file that was not open in-memory.")
1668            }
1669        }
1670    }
1671}
1672
1673impl std::error::Error for FileCloseError {}
1674
1675/// Contains information about the closed in-memory file and its replacement from the filesystem
1676/// in the case that an in-memory file was closed successfully.
1677pub struct FileCloseResult {
1678    /// The [FileId] of the in-memory file that was closed.
1679    pub closed_id: FileId,
1680    /// The [FileId] of the file loaded from the filesystem with the same path as the closed
1681    /// file, or an error indicating why the file could not be opened.
1682    /// An error would be expected here in the case that the file was deleted, which would
1683    /// also send a close file notification to the LSP.
1684    pub replacement_id: Result<FileId, io::Error>,
1685}
1686
1687/// Cache entries for sources.
1688///
1689/// A source can be either a snippet input by the user, in which case it is only identified by its
1690/// name in the name-id table, and a unique `FileId`. On the other hand, different versions of the
1691/// same file can coexist during the same session of the REPL. For this reason, an entry of the
1692/// name-id table of a file also stores the *modified at* timestamp, such that if a file is
1693/// imported or loaded again and has been modified in between, the entry is invalidated, the
1694/// content is loaded again and a new `FileId` is generated.
1695///
1696/// Note that in that case, invalidation just means that the `FileId` of a previous version is not
1697/// accessible anymore in the name-id table. However, terms that contain non evaluated imports or
1698/// source locations referring to previous version are still able access the corresponding source
1699/// or term which are kept respectively in `files` and `cache` by using the corresponding `FileId`.
1700#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Copy, Clone)]
1701pub struct NameIdEntry {
1702    id: FileId,
1703    source: SourceKind,
1704}
1705
1706/// The state of an entry of the term cache.
1707///
1708/// # Imports
1709///
1710/// Usually, when applying a procedure to a term entry (e.g. program transformations), we process
1711/// all of its transitive imports as well. We start by processing the entry, updating the state to
1712/// `XXXing` (ex: `Typechecking`) upon success. Only when all the imports have been successfully
1713/// processed, the state is updated to `XXXed` (ex: `Typechecked`).
1714#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Copy, Clone, Default)]
1715pub enum TermEntryState {
1716    /// The initial state. The term is in the cache but hasn't been processed further yet.
1717    #[default]
1718    Populated,
1719    /// A custom transformation of the entry (through `Program::custom_transform`) is underway.
1720    CustomTransforming,
1721    /// This entry has completed custom transformations of this ID and lower.
1722    CustomTransformed { transform_id: usize },
1723    /// The imports of the entry have been resolved, and the imports of its (transitive) imports are
1724    /// being resolved.
1725    ImportsResolving,
1726    /// The imports of the entry and its transitive dependencies has been resolved.
1727    ImportsResolved,
1728    /// The entry have been transformed, and its (transitive) imports are being transformed.
1729    Transforming,
1730    /// The entry and its transitive imports have been transformed.
1731    Transformed,
1732    /// The entry has been closurized.
1733    Closurized,
1734}
1735
1736impl TermEntryState {
1737    fn needs_custom_transform(&self, transform_id: usize) -> bool {
1738        if let TermEntryState::CustomTransformed {
1739            transform_id: done_transform_id,
1740        } = self
1741        {
1742            transform_id > *done_transform_id
1743        } else {
1744            *self < TermEntryState::CustomTransforming
1745        }
1746    }
1747}
1748
1749/// The state of an entry in the AST cache. Equivalent of [TermEntryState] but for ASTs.
1750#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Copy, Clone, Default)]
1751pub enum AstEntryState {
1752    /// The initial state. The AST is in the cache but hasn't been processed further yet.
1753    #[default]
1754    Parsed,
1755    /// The entry have been typechecked, and its (transitive) imports are being typechecked.
1756    Typechecking,
1757    /// The entry and its transitive imports have been typechecked.
1758    Typechecked,
1759}
1760
1761/// The result of a cache operation, such as parsing, typechecking, etc. which can either have
1762/// performed actual work, or have done nothing if the corresponding entry was already at a later
1763/// stage.
1764#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Copy, Clone)]
1765pub enum CacheOp<T> {
1766    Done(T),
1767    Cached(T),
1768}
1769
1770impl<T> CacheOp<T> {
1771    pub fn inner(self: CacheOp<T>) -> T {
1772        match self {
1773            CacheOp::Done(t) | CacheOp::Cached(t) => t,
1774        }
1775    }
1776}
1777
1778/// Wrapper around other errors to indicate that typechecking or applying program transformations
1779/// failed because the source has not been parsed yet.
1780///
1781/// # Type parameters
1782///
1783/// - `E`: the underlying, wrapped error type
1784/// - `S`: the entry state, whether [TermEntryState] or [AstEntryState] in practice.
1785#[derive(Eq, PartialEq, Debug, Clone)]
1786pub enum CacheError<E, S> {
1787    Error(E),
1788    /// The state of the entry in the cache is incompatible with the requested operation.
1789    IncompatibleState {
1790        want: S,
1791    },
1792}
1793
1794pub type AstCacheError<E> = CacheError<E, AstEntryState>;
1795pub type TermCacheError<E> = CacheError<E, TermEntryState>;
1796
1797impl<E, S> From<E> for CacheError<E, S> {
1798    fn from(e: E) -> Self {
1799        CacheError::Error(e)
1800    }
1801}
1802
1803impl<E, S> CacheError<E, S> {
1804    #[track_caller]
1805    pub fn unwrap_error(self, msg: &str) -> E {
1806        match self {
1807            CacheError::Error(err) => err,
1808            CacheError::IncompatibleState { .. } => panic!("{}", msg),
1809        }
1810    }
1811
1812    pub fn map_err<O>(self, f: impl FnOnce(E) -> O) -> CacheError<O, S> {
1813        match self {
1814            CacheError::Error(e) => CacheError::Error(f(e)),
1815            CacheError::IncompatibleState { want } => CacheError::IncompatibleState { want },
1816        }
1817    }
1818
1819    /// Assuming that `self` is of the form `CacheError::Error(e)`, cast the error type to another
1820    /// arbitrary state type `T`.
1821    ///
1822    /// # Panic
1823    ///
1824    /// This method panics if `self` is [CacheError::IncompatibleState].
1825    #[track_caller]
1826    pub fn force_cast<T>(self) -> CacheError<E, T> {
1827        match self {
1828            CacheError::Error(e) => CacheError::Error(e),
1829            CacheError::IncompatibleState { want: _ } => panic!(),
1830        }
1831    }
1832}
1833
1834/// Input data usually comes from files on the file system, but there are also lots of cases where
1835/// we want to synthesize other kinds of inputs.
1836///
1837/// Note that a [SourcePath] does not uniquely identify a cached input:
1838///
1839/// - Some functions (like [SourceCache::add_file]) add a new cached input unconditionally.
1840/// - [`SourceCache::get_or_add_file`] will add a new cached input at the same `SourcePath` if the file
1841///   on disk was updated.
1842///
1843/// The equality checking of `SourcePath` only affects [SourceCache::replace_string], which
1844/// overwrites any previous cached input with the same `SourcePath`.
1845#[derive(Debug, PartialEq, Eq, Hash, Clone)]
1846pub enum SourcePath {
1847    /// A file at the given path.
1848    ///
1849    /// Note that this does not need to be a real file on the filesystem: it could still be loaded
1850    /// from memory by, e.g, [`SourceCache::add_string`].
1851    ///
1852    /// This is the only `SourcePath` variant that can be resolved as the target of an import
1853    /// statement.
1854    Path(PathBuf, InputFormat),
1855    /// A subrange of a file at the given path.
1856    ///
1857    /// This is used by NLS to analyze small parts of files that don't fully parse. The original
1858    /// file path is preserved, because it's needed for resolving imports.
1859    Snippet(PathBuf),
1860    Std(StdlibModule),
1861    Query,
1862    ReplInput(usize),
1863    ReplTypecheck,
1864    ReplQuery,
1865    CliFieldAssignment,
1866    Override(FieldPath),
1867    Generated(String),
1868}
1869
1870impl<'a> TryFrom<&'a SourcePath> for &'a OsStr {
1871    type Error = ();
1872
1873    fn try_from(value: &'a SourcePath) -> Result<Self, Self::Error> {
1874        match value {
1875            SourcePath::Path(p, _) | SourcePath::Snippet(p) => Ok(p.as_os_str()),
1876            _ => Err(()),
1877        }
1878    }
1879}
1880
1881// [`Files`] needs to have an OsString for each file, so we synthesize names even for sources that
1882// don't have them. They don't need to be unique; they're just used for diagnostics.
1883impl From<SourcePath> for OsString {
1884    fn from(source_path: SourcePath) -> Self {
1885        match source_path {
1886            SourcePath::Path(p, _) | SourcePath::Snippet(p) => p.into(),
1887            SourcePath::Std(StdlibModule::Std) => "<stdlib/std.ncl>".into(),
1888            SourcePath::Std(StdlibModule::Internals) => "<stdlib/internals.ncl>".into(),
1889            SourcePath::Query => "<query>".into(),
1890            SourcePath::ReplInput(idx) => format!("<repl-input-{idx}>").into(),
1891            SourcePath::ReplTypecheck => "<repl-typecheck>".into(),
1892            SourcePath::ReplQuery => "<repl-query>".into(),
1893            SourcePath::CliFieldAssignment => "<cli-assignment>".into(),
1894            SourcePath::Override(path) => format!("<override {path}>",).into(),
1895            SourcePath::Generated(description) => format!("<generated {description}>").into(),
1896        }
1897    }
1898}
1899
1900/// Return status indicating if an import has been resolved from a file (first encounter), or was
1901/// retrieved from the cache.
1902///
1903/// See [ImportResolver::resolve].
1904#[derive(Debug, PartialEq, Eq)]
1905pub enum ResolvedTerm {
1906    FromFile {
1907        path: PathBuf, /* the loaded path */
1908    },
1909    FromCache,
1910}
1911
1912#[derive(Copy, Clone, Debug, PartialEq, Eq)]
1913pub enum SourceState {
1914    UpToDate(FileId),
1915    /// The source is stale because it came from a file on disk that has since been updated. The
1916    /// data is the timestamp of the new version of the file.
1917    Stale(SystemTime),
1918}
1919
1920/// Abstract the access to imported files and the import cache. Used by the evaluator and at the
1921/// [import resolution](crate::transform::import_resolution) phase.
1922///
1923/// The standard implementation uses 2 caches, the file cache for raw contents and the term cache
1924/// for parsed contents, mirroring the 2 steps when resolving an import:
1925///
1926/// 1. When an import is encountered for the first time, the content of the corresponding file is
1927///    read and stored in the file cache (consisting of the file database plus a map between paths
1928///    and ids in the database, the name-id table). The content is parsed, stored in the term
1929///    cache, and queued somewhere so that it can undergo the standard
1930///    [transformations](crate::transform) (including import resolution) later.
1931/// 2. When it is finally processed, the term cache is updated with the transformed term.
1932///
1933/// # RFC007
1934///
1935/// Import resolution on the old representation is still needed only because of the evaluator. The
1936/// typechecker now uses the new AST representation with its own import resolver.
1937pub trait ImportResolver {
1938    /// Resolves an import.
1939    ///
1940    /// Reads and stores the content of an import, puts it in the file cache (or get it from there
1941    /// if it is cached), then parses it and returns the corresponding term and file id.
1942    ///
1943    /// The term and the path are provided only if the import is processed for the first time.
1944    /// Indeed, at import resolution phase, the term of an import encountered for the first time is
1945    /// queued to be processed (e.g. having its own imports resolved). The path is needed to
1946    /// resolve nested imports relatively to this parent. Only after this processing the term is
1947    /// inserted back in the cache. On the other hand, if it has been resolved before, it is
1948    /// already transformed in the cache and do not need further processing.
1949    fn resolve(
1950        &mut self,
1951        import: &term::Import,
1952        parent: Option<FileId>,
1953        pos: &TermPos,
1954    ) -> Result<(ResolvedTerm, FileId), ImportError>;
1955
1956    /// Return a reference to the file database.
1957    fn files(&self) -> &Files;
1958
1959    /// Get a resolved import from the term cache.
1960    fn get(&self, file_id: FileId) -> Option<RichTerm>;
1961    /// Return the (potentially normalized) file path corresponding to the ID of a resolved import.
1962    fn get_path(&self, file_id: FileId) -> Option<&OsStr>;
1963
1964    /// Returns the base path for Nix evaluation, which is the parent directory of the source file
1965    /// if any, or the current working directory, or an empty path if we couldn't determine any of
1966    /// the previous two.
1967    ///
1968    /// This method need to be here because the evaluator makes use of it (when evaluating the
1969    /// `eval_nix` primop), but at this stage it only has access to the `ImportResolver` interface.
1970    /// We could give a default implementation here just using [Self::get_path], but we also need
1971    /// `get_base_dir_for_nix` in [SourceCache]. We reuse the latter `implementation instead of
1972    /// duplicating a more generic variant here.
1973    #[cfg(feature = "nix-experimental")]
1974    fn get_base_dir_for_nix(&self, file_id: FileId) -> PathBuf;
1975}
1976
1977impl ImportResolver for CacheHub {
1978    fn resolve(
1979        &mut self,
1980        import: &term::Import,
1981        parent: Option<FileId>,
1982        pos: &TermPos,
1983    ) -> Result<(ResolvedTerm, FileId), ImportError> {
1984        let (possible_parents, path, pkg_id, format) = match import {
1985            term::Import::Path { path, format } => {
1986                // `parent` is the file that did the import. We first look in its containing directory, followed by
1987                // the directories in the import path.
1988                let mut parent_path = parent
1989                    .and_then(|p| self.get_path(p))
1990                    .map(PathBuf::from)
1991                    .unwrap_or_default();
1992                parent_path.pop();
1993
1994                (
1995                    std::iter::once(parent_path)
1996                        .chain(self.sources.import_paths.iter().cloned())
1997                        .collect(),
1998                    Path::new(path),
1999                    None,
2000                    *format,
2001                )
2002            }
2003            term::Import::Package { id } => {
2004                let package_map = self
2005                    .sources
2006                    .package_map
2007                    .as_ref()
2008                    .ok_or(ImportError::NoPackageMap { pos: *pos })?;
2009                let parent_path = parent
2010                    .and_then(|p| self.sources.packages.get(&p))
2011                    .map(PathBuf::as_path);
2012                let pkg_path = package_map.get(parent_path, *id, *pos)?;
2013                (
2014                    vec![pkg_path.to_owned()],
2015                    Path::new("main.ncl"),
2016                    Some(pkg_path.to_owned()),
2017                    // Packages are always in nickel format
2018                    InputFormat::Nickel,
2019                )
2020            }
2021        };
2022
2023        // Try to import from all possibilities, taking the first one that succeeds.
2024        let (id_op, path_buf) = possible_parents
2025            .iter()
2026            .find_map(|parent| {
2027                let mut path_buf = parent.clone();
2028                path_buf.push(path);
2029                self.sources
2030                    .get_or_add_file(&path_buf, format)
2031                    .ok()
2032                    .map(|x| (x, path_buf))
2033            })
2034            .ok_or_else(|| {
2035                let parents = possible_parents
2036                    .iter()
2037                    .map(|p| p.to_string_lossy())
2038                    .collect::<Vec<_>>();
2039                ImportError::IOError(
2040                    path.to_string_lossy().into_owned(),
2041                    format!("could not find import (looked in [{}])", parents.join(", ")),
2042                    *pos,
2043                )
2044            })?;
2045
2046        let (result, file_id) = match id_op {
2047            CacheOp::Cached(id) => (ResolvedTerm::FromCache, id),
2048            CacheOp::Done(id) => (ResolvedTerm::FromFile { path: path_buf }, id),
2049        };
2050
2051        if let Some(parent) = parent {
2052            self.import_data
2053                .imports
2054                .entry(parent)
2055                .or_default()
2056                .insert(ImportTarget { file_id, format });
2057            self.import_data
2058                .rev_imports
2059                .entry(file_id)
2060                .or_default()
2061                .entry(parent)
2062                .or_insert(*pos);
2063        }
2064
2065        self.parse_to_term(file_id, format)
2066            .map_err(|err| ImportError::ParseErrors(err, *pos))?;
2067
2068        if let Some(pkg_id) = pkg_id {
2069            self.sources.packages.insert(file_id, pkg_id);
2070        }
2071
2072        Ok((result, file_id))
2073    }
2074
2075    fn files(&self) -> &Files {
2076        &self.sources.files
2077    }
2078
2079    fn get(&self, file_id: FileId) -> Option<RichTerm> {
2080        self.terms
2081            .terms
2082            .get(&file_id)
2083            .map(|TermEntry { term, .. }| term.clone())
2084    }
2085
2086    fn get_path(&self, file_id: FileId) -> Option<&OsStr> {
2087        self.sources
2088            .file_paths
2089            .get(&file_id)
2090            .and_then(|p| p.try_into().ok())
2091    }
2092
2093    #[cfg(feature = "nix-experimental")]
2094    fn get_base_dir_for_nix(&self, file_id: FileId) -> PathBuf {
2095        self.sources.get_base_dir_for_nix(file_id)
2096    }
2097}
2098
2099/// Import resolution for new AST representation (RFC007).
2100pub trait AstImportResolver {
2101    /// Resolves an import to an AST.
2102    ///
2103    /// Reads and stores the content of an import, puts it in the file cache (or gets it from there
2104    /// if it is cached), then parses it and returns the corresponding term and file id.
2105    ///
2106    /// The term and the path are provided only if the import is processed for the first time.
2107    /// Indeed, at import resolution phase, the term of an import encountered for the first time is
2108    /// queued to be processed (e.g. having its own imports resolved). The path is needed to
2109    /// resolve nested imports relatively to this parent. Only after this processing the term is
2110    /// inserted back in the cache. On the other hand, if it has been resolved before, it is
2111    /// already transformed in the cache and do not need further processing.
2112    ///
2113    /// # Returns
2114    ///
2115    /// [Self::resolve] returns `Ok(None)` if the import is an external format, which can currently
2116    /// be serialized directly to he runtime representation ([crate::bytecode::value::NickelValue])
2117    /// without going through an AST. AST import resolution is mostly used by the typechecker, and
2118    /// the typechecker currently ignores external formats anyway.
2119    ///
2120    /// # Lifetimes
2121    ///
2122    /// The signature is parametrized by two different lifetimes. This is due mostly to NLS: in the
2123    /// normal Nickel pipeline, all the ASTs are currently allocated in the same arena, and their
2124    /// lifetime is the same. However, in NLS, each files needs to be managed separately. At the
2125    /// import boundary, we're thus not guaranteed to get an AST that lives as long as the one
2126    /// being currently typechecked.
2127    fn resolve<'ast_out>(
2128        &'ast_out mut self,
2129        import: &ast::Import<'_>,
2130        pos: &TermPos,
2131    ) -> Result<Option<&'ast_out Ast<'ast_out>>, ImportError>;
2132}
2133
2134/// Normalize the path of a file for unique identification in the cache.
2135///
2136/// The returned path will be an absolute path.
2137pub fn normalize_path(path: impl Into<PathBuf>) -> std::io::Result<PathBuf> {
2138    let mut path = path.into();
2139    if path.is_relative() {
2140        path = std::env::current_dir()?.join(path);
2141    }
2142    Ok(normalize_abs_path(&path))
2143}
2144
2145/// Normalize the path (assumed to be absolute) of a file for unique identification in the cache.
2146///
2147/// This implementation (including the comment below) was taken from cargo-util.
2148///
2149/// CAUTION: This does not resolve symlinks (unlike [`std::fs::canonicalize`]). This may cause
2150/// incorrect or surprising behavior at times. This should be used carefully. Unfortunately,
2151/// [`std::fs::canonicalize`] can be hard to use correctly, since it can often fail, or on Windows
2152/// returns annoying device paths. This is a problem Cargo needs to improve on.
2153pub fn normalize_abs_path(path: &Path) -> PathBuf {
2154    use std::path::Component;
2155
2156    let mut components = path.components().peekable();
2157    let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().cloned() {
2158        components.next();
2159        PathBuf::from(c.as_os_str())
2160    } else {
2161        PathBuf::new()
2162    };
2163
2164    for component in components {
2165        match component {
2166            Component::Prefix(..) => unreachable!(),
2167            Component::RootDir => {
2168                ret.push(component.as_os_str());
2169            }
2170            Component::CurDir => {}
2171            Component::ParentDir => {
2172                ret.pop();
2173            }
2174            Component::Normal(c) => {
2175                ret.push(c);
2176            }
2177        }
2178    }
2179    ret
2180}
2181
2182/// Normalize a relative path, removing mid-path `..`s.
2183///
2184/// Like [`normalize_abs_path`], this works only on the path itself (i.e. not the filesystem) and
2185/// does not follow symlinks.
2186pub fn normalize_rel_path(path: &Path) -> PathBuf {
2187    use std::path::Component;
2188
2189    let mut components = path.components().peekable();
2190    let mut parents = PathBuf::new();
2191    let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().cloned() {
2192        components.next();
2193        PathBuf::from(c.as_os_str())
2194    } else {
2195        PathBuf::new()
2196    };
2197
2198    for component in components {
2199        match component {
2200            Component::Prefix(..) => unreachable!(),
2201            Component::RootDir => {
2202                ret.push(component.as_os_str());
2203            }
2204            Component::CurDir => {}
2205            Component::ParentDir => {
2206                if !ret.pop() {
2207                    parents.push(Component::ParentDir);
2208                }
2209            }
2210            Component::Normal(c) => {
2211                ret.push(c);
2212            }
2213        }
2214    }
2215    parents.extend(ret.components());
2216    parents
2217}
2218
2219/// Returns the timestamp of a file. Return `None` if an IO error occurred.
2220pub fn timestamp(path: impl AsRef<OsStr>) -> io::Result<SystemTime> {
2221    fs::metadata(path.as_ref())?.modified()
2222}
2223
2224/// As RFC007 is being rolled out, the typechecker now needs to operate on the new AST. We need a
2225/// structure that implements [AstImportResolver].
2226///
2227/// For borrowing reasons, this can't be all of [CacheHub] or all of [ast_cache::AstCache], as we
2228/// need to split the different things that are borrowed mutably or immutably. `AstResolver` is a
2229/// structure that borrows some parts of the cache during its lifetime and will retrieve alredy
2230/// imported ASTs, or register the newly imported ones in a separate hashmap that can be added back
2231/// to the original cache once import resolution is done.
2232pub struct AstResolver<'ast, 'cache> {
2233    /// The AST allocator used to parse new sources.
2234    alloc: &'ast AstAlloc,
2235    /// The AST cache, which is added to as import resolution progresses.
2236    asts: &'cache mut HashMap<FileId, AstEntry<'ast>>,
2237    /// The source cache where new sources will be stored.
2238    sources: &'cache mut SourceCache,
2239    /// Direct and reverse dependencies of files (with respect to imports).
2240    import_data: &'cache mut ImportData,
2241}
2242
2243impl<'ast, 'cache> AstResolver<'ast, 'cache> {
2244    /// Create a new `AstResolver` from an allocator, an ast cache and a cache hub slice.
2245    pub fn new(
2246        alloc: &'ast AstAlloc,
2247        asts: &'cache mut HashMap<FileId, AstEntry<'ast>>,
2248        slice: CacheHubView<'cache>,
2249    ) -> Self {
2250        Self {
2251            alloc,
2252            asts,
2253            sources: slice.sources,
2254            import_data: slice.import_data,
2255        }
2256    }
2257}
2258
2259impl AstImportResolver for AstResolver<'_, '_> {
2260    fn resolve(
2261        &mut self,
2262        import: &ast::Import<'_>,
2263        pos: &TermPos,
2264    ) -> Result<Option<&Ast<'_>>, ImportError> {
2265        let parent_id = pos.src_id();
2266
2267        let (possible_parents, path, pkg_id, format) = match import {
2268            ast::Import::Path { path, format } => {
2269                // `parent` is the file that did the import. We first look in its containing
2270                // directory, followed by the directories in the import path.
2271                let parent_path = parent_id
2272                    .and_then(|parent| self.sources.file_paths.get(&parent))
2273                    .and_then(|path| <&OsStr>::try_from(path).ok())
2274                    .map(PathBuf::from)
2275                    .map(|mut path| {
2276                        path.pop();
2277                        path
2278                    })
2279                    // If the parent isn't a proper file, we look in the current directory instead.
2280                    // This is useful when importing e.g. from the REPL or the CLI directly.
2281                    .unwrap_or_default();
2282
2283                (
2284                    std::iter::once(parent_path)
2285                        .chain(self.sources.import_paths.iter().cloned())
2286                        .collect(),
2287                    Path::new(path),
2288                    None,
2289                    *format,
2290                )
2291            }
2292            ast::Import::Package { id } => {
2293                let package_map = self
2294                    .sources
2295                    .package_map
2296                    .as_ref()
2297                    .ok_or(ImportError::NoPackageMap { pos: *pos })?;
2298                let parent_path = parent_id
2299                    .and_then(|p| self.sources.packages.get(&p))
2300                    .map(PathBuf::as_path);
2301                let pkg_path = package_map.get(parent_path, *id, *pos)?;
2302                (
2303                    vec![pkg_path.to_owned()],
2304                    Path::new("main.ncl"),
2305                    Some(pkg_path.to_owned()),
2306                    // Packages are always in nickel format
2307                    InputFormat::Nickel,
2308                )
2309            }
2310        };
2311
2312        // Try to import from all possibilities, taking the first one that succeeds.
2313        let id_op = possible_parents
2314            .iter()
2315            .find_map(|parent| {
2316                let mut path_buf = parent.clone();
2317                path_buf.push(path);
2318                self.sources.get_or_add_file(&path_buf, format).ok()
2319            })
2320            .ok_or_else(|| {
2321                let parents = possible_parents
2322                    .iter()
2323                    .map(|p| p.to_string_lossy())
2324                    .collect::<Vec<_>>();
2325                ImportError::IOError(
2326                    path.to_string_lossy().into_owned(),
2327                    format!("could not find import (looked in [{}])", parents.join(", ")),
2328                    *pos,
2329                )
2330            })?;
2331
2332        let file_id = id_op.inner();
2333
2334        if let Some(parent_id) = parent_id {
2335            self.import_data
2336                .imports
2337                .entry(parent_id)
2338                .or_default()
2339                .insert(ImportTarget { file_id, format });
2340            self.import_data
2341                .rev_imports
2342                .entry(file_id)
2343                .or_default()
2344                .entry(parent_id)
2345                .or_insert(*pos);
2346        }
2347
2348        if let Some(pkg_id) = pkg_id {
2349            self.sources.packages.insert(file_id, pkg_id);
2350        }
2351
2352        if let InputFormat::Nickel = format {
2353            if let Some(entry) = self.asts.get(&file_id) {
2354                Ok(Some(entry.ast))
2355            } else {
2356                let ast = parse_nickel(self.alloc, file_id, self.sources.files.source(file_id))
2357                    .map_err(|parse_err| ImportError::ParseErrors(parse_err, *pos))?;
2358                let ast = self.alloc.alloc(ast);
2359                self.asts.insert(file_id, AstEntry::new(ast));
2360
2361                Ok(Some(ast))
2362            }
2363        } else {
2364            // Currently, non-Nickel file are just ignored during the AST file. They are parsed
2365            // later directly into the runtime
2366            Ok(None)
2367        }
2368    }
2369}
2370
2371/// Provide mockup import resolvers for testing purpose.
2372pub mod resolvers {
2373    use super::*;
2374    use crate::term::Import;
2375
2376    /// A dummy resolver that panics when asked to do something. Used to test code that contains no
2377    /// import.
2378    pub struct DummyResolver {}
2379
2380    impl ImportResolver for DummyResolver {
2381        fn resolve(
2382            &mut self,
2383            _import: &Import,
2384            _parent: Option<FileId>,
2385            _pos: &TermPos,
2386        ) -> Result<(ResolvedTerm, FileId), ImportError> {
2387            panic!("cache::resolvers: dummy resolver should not have been invoked");
2388        }
2389
2390        fn files(&self) -> &Files {
2391            panic!("cache::resolvers: dummy resolver should not have been invoked");
2392        }
2393
2394        fn get(&self, _file_id: FileId) -> Option<RichTerm> {
2395            panic!("cache::resolvers: dummy resolver should not have been invoked");
2396        }
2397
2398        fn get_path(&self, _file_id: FileId) -> Option<&OsStr> {
2399            panic!("cache::resolvers: dummy resolver should not have been invoked");
2400        }
2401
2402        #[cfg(feature = "nix-experimental")]
2403        fn get_base_dir_for_nix(&self, _file_id: FileId) -> PathBuf {
2404            panic!("cache::resolvers: dummy resolver should not have been invoked");
2405        }
2406    }
2407
2408    /// Resolve imports from a mockup file database. Used to test imports without accessing the
2409    /// file system. File name are stored as strings, and silently converted from/to `OsString`
2410    /// when needed: don't use this resolver with source code that import non UTF-8 paths.
2411    #[derive(Clone, Default)]
2412    pub struct SimpleResolver {
2413        files: Files,
2414        file_cache: HashMap<String, FileId>,
2415        term_cache: HashMap<FileId, RichTerm>,
2416    }
2417
2418    impl SimpleResolver {
2419        pub fn new() -> SimpleResolver {
2420            SimpleResolver::default()
2421        }
2422
2423        /// Add a mockup file to available imports.
2424        pub fn add_source(&mut self, name: String, source: String) {
2425            let id = self.files.add(name.clone(), source);
2426            self.file_cache.insert(name, id);
2427        }
2428    }
2429
2430    impl ImportResolver for SimpleResolver {
2431        fn resolve(
2432            &mut self,
2433            import: &Import,
2434            _parent: Option<FileId>,
2435            pos: &TermPos,
2436        ) -> Result<(ResolvedTerm, FileId), ImportError> {
2437            let Import::Path { path, .. } = import else {
2438                panic!("simple resolver doesn't support packages");
2439            };
2440
2441            let file_id = self
2442                .file_cache
2443                .get(path.to_string_lossy().as_ref())
2444                .copied()
2445                .ok_or_else(|| {
2446                    ImportError::IOError(
2447                        path.to_string_lossy().into_owned(),
2448                        String::from("Import not found by the mockup resolver."),
2449                        *pos,
2450                    )
2451                })?;
2452
2453            if let hash_map::Entry::Vacant(e) = self.term_cache.entry(file_id) {
2454                let buf = self.files.source(file_id);
2455                let alloc = AstAlloc::new();
2456
2457                let ast = parser::grammar::TermParser::new()
2458                    .parse_strict(&alloc, file_id, Lexer::new(buf))
2459                    .map_err(|e| ImportError::ParseErrors(e, *pos))?;
2460                e.insert(ast.to_mainline());
2461
2462                Ok((
2463                    ResolvedTerm::FromFile {
2464                        path: PathBuf::new(),
2465                    },
2466                    file_id,
2467                ))
2468            } else {
2469                Ok((ResolvedTerm::FromCache, file_id))
2470            }
2471        }
2472
2473        fn files(&self) -> &Files {
2474            &self.files
2475        }
2476
2477        fn get(&self, file_id: FileId) -> Option<RichTerm> {
2478            self.term_cache.get(&file_id).cloned()
2479        }
2480
2481        fn get_path(&self, file_id: FileId) -> Option<&OsStr> {
2482            Some(self.files.name(file_id))
2483        }
2484
2485        #[cfg(feature = "nix-experimental")]
2486        fn get_base_dir_for_nix(&self, file_id: FileId) -> PathBuf {
2487            self.get_path(file_id)
2488                .and_then(|path| Path::new(path).parent())
2489                .map(PathBuf::from)
2490                .unwrap_or_default()
2491        }
2492    }
2493}
2494
2495/// Parses a Nickel expression from a string.
2496fn parse_nickel<'ast>(
2497    alloc: &'ast AstAlloc,
2498    file_id: FileId,
2499    source: &str,
2500) -> Result<Ast<'ast>, ParseErrors> {
2501    let ast = measure_runtime!(
2502        "runtime:parse:nickel",
2503        parser::grammar::TermParser::new().parse_strict(alloc, file_id, Lexer::new(source))?
2504    );
2505
2506    Ok(ast)
2507}
2508
2509// Parse a Nickel REPL input. In addition to normal Nickel expressions, it can be a top-level let.
2510fn parse_nickel_repl<'ast>(
2511    alloc: &'ast AstAlloc,
2512    file_id: FileId,
2513    source: &str,
2514) -> Result<ExtendedTerm<Ast<'ast>>, ParseErrors> {
2515    let et = measure_runtime!(
2516        "runtime:parse:nickel",
2517        parser::grammar::ExtendedTermParser::new().parse_strict(
2518            alloc,
2519            file_id,
2520            Lexer::new(source)
2521        )?
2522    );
2523
2524    Ok(et)
2525}
2526
2527/// AST cache (for the new [crate::bytecode::ast::Ast]) that holds the owned allocator of the AST
2528/// nodes.
2529mod ast_cache {
2530    use super::*;
2531    /// The AST cache packing together the AST allocator and the cached ASTs.
2532    #[self_referencing]
2533    pub struct AstCache {
2534        /// The allocator hosting AST nodes.
2535        alloc: AstAlloc,
2536        /// An AST for each file we have cached.
2537        #[borrows(alloc)]
2538        #[covariant]
2539        asts: HashMap<FileId, AstEntry<'this>>,
2540        /// The initial typing context. It's morally an option (unitialized at first), but we just
2541        /// use an empty context as a default value.
2542        ///
2543        /// This context can be augmented through [AstCache::add_repl_binding] and
2544        /// [AstCache::add_repl_bindings], which is typically used in the REPL to add top-level
2545        /// bindings.
2546        #[borrows(alloc)]
2547        #[not_covariant]
2548        type_ctxt: typecheck::Context<'this>,
2549    }
2550
2551    impl AstCache {
2552        /// Construct a new, empty, AST cache.
2553        pub fn empty() -> Self {
2554            AstCache::new(
2555                AstAlloc::new(),
2556                |_alloc| HashMap::new(),
2557                |_alloc| typecheck::Context::new(),
2558            )
2559        }
2560
2561        /// Clears the allocator and the cached ASTs.
2562        pub fn clear(&mut self) {
2563            *self = Self::empty();
2564        }
2565
2566        /// Returns `true` if the AST cache contains an entry for the given file id.
2567        pub fn contains(&self, file_id: FileId) -> bool {
2568            self.borrow_asts().contains_key(&file_id)
2569        }
2570
2571        /// Returns the underlying allocator, which might be required to call various helpers.
2572        pub fn get_alloc(&self) -> &AstAlloc {
2573            self.borrow_alloc()
2574        }
2575
2576        /// Returns a reference to a cached AST.
2577        pub fn get(&self, file_id: FileId) -> Option<&Ast<'_>> {
2578            self.borrow_asts().get(&file_id).map(|entry| entry.ast)
2579        }
2580
2581        /// Returns a reference to a cached AST entry.
2582        pub fn get_entry(&self, file_id: FileId) -> Option<&AstEntry<'_>> {
2583            self.borrow_asts().get(&file_id)
2584        }
2585
2586        /// Retrieves the state of an entry. Returns `None` if the entry is not in the AST cache.
2587        pub fn entry_state(&self, file_id: FileId) -> Option<AstEntryState> {
2588            self.borrow_asts()
2589                .get(&file_id)
2590                .map(|AstEntry { state, .. }| *state)
2591        }
2592
2593        /// Updates the state of an entry and returns the previous state, or an error if the entry
2594        /// isn't in the cache.
2595        pub fn update_state(
2596            &mut self,
2597            file_id: FileId,
2598            new: AstEntryState,
2599        ) -> Result<AstEntryState, TermNotFound> {
2600            self.with_asts_mut(|asts| {
2601                asts.get_mut(&file_id)
2602                    .map(|AstEntry { state, .. }| std::mem::replace(state, new))
2603            })
2604            .ok_or(TermNotFound)
2605        }
2606
2607        /// Parses a Nickel expression and stores the corresponding AST in the cache.
2608        pub fn parse_nickel<'ast>(
2609            &'ast mut self,
2610            file_id: FileId,
2611            source: &str,
2612        ) -> Result<&'ast Ast<'ast>, ParseErrors> {
2613            self.with_mut(|slf| {
2614                let ast = parse_nickel(slf.alloc, file_id, source)?;
2615                let ast = slf.alloc.alloc(ast);
2616                slf.asts.insert(file_id, AstEntry::new(ast));
2617
2618                Ok(ast)
2619            })
2620        }
2621
2622        /// Same as [Self::parse_nickel] but accepts the extended syntax allowed in the REPL.
2623        ///
2624        /// **Caution**: this method doesn't cache the potential id of a top-level let binding,
2625        /// although it does save the bound expression, which is required later for typechecking,
2626        /// program transformation, etc.
2627        pub fn parse_nickel_repl<'ast>(
2628            &'ast mut self,
2629            file_id: FileId,
2630            source: &str,
2631        ) -> Result<ExtendedTerm<Ast<'ast>>, ParseErrors> {
2632            self.with_mut(|slf| {
2633                let extd_ast = parse_nickel_repl(slf.alloc, file_id, source)?;
2634
2635                let ast = match &extd_ast {
2636                    ExtendedTerm::Term(t) | ExtendedTerm::ToplevelLet(_, t) => {
2637                        slf.alloc.alloc(t.clone())
2638                    }
2639                };
2640
2641                slf.asts.insert(file_id, AstEntry::new(ast));
2642
2643                Ok(extd_ast)
2644            })
2645        }
2646
2647        pub fn remove(&mut self, file_id: FileId) -> Option<AstEntry<'_>> {
2648            self.with_asts_mut(|asts| asts.remove(&file_id))
2649        }
2650
2651        /// Typechecks an entry of the cache and updates its state accordingly, or does nothing if
2652        /// the entry has already been typechecked. Requires that the corresponding source has been
2653        /// parsed. Note that this method currently fail on a non-Nickel file, that can't have been
2654        /// parsed to an AST.
2655        ///
2656        /// If the source contains imports, recursively typecheck on the imports too.
2657        ///
2658        /// # RFC007
2659        ///
2660        /// During the transition period between the old VM and the new bytecode VM, this method
2661        /// performs typechecking on the new representation [crate::bytecode::ast::Ast], and is also
2662        /// responsible for then converting the term to the legacy representation and populate the
2663        /// corresponding term cache.
2664        pub fn typecheck(
2665            &mut self,
2666            mut slice: CacheHubView<'_>,
2667            file_id: FileId,
2668            initial_mode: TypecheckMode,
2669        ) -> Result<CacheOp<()>, AstCacheError<TypecheckError>> {
2670            let Some(state) = self.entry_state(file_id) else {
2671                return Err(CacheError::IncompatibleState {
2672                    want: AstEntryState::Parsed,
2673                });
2674            };
2675
2676            // If we're already typechecking or we have typechecked the file, we stop right here.
2677            if state >= AstEntryState::Typechecking {
2678                return Ok(CacheOp::Cached(()));
2679            }
2680
2681            // Protect against cycles in the import graph.
2682            // unwrap(): we checked at the beginning of this function that the term is in the
2683            // cache.
2684            let _ = self
2685                .update_state(file_id, AstEntryState::Typechecking)
2686                .unwrap();
2687
2688            // Ensure the initial typing context is properly initialized.
2689            self.populate_type_ctxt(slice.sources);
2690            self.with_mut(|slf| -> Result<(), AstCacheError<TypecheckError>> {
2691                // unwrap(): we checked at the beginning of this function that the AST cache has an
2692                // entry for `file_id`.
2693                let ast = slf.asts.get(&file_id).unwrap().ast;
2694
2695                let mut resolver = AstResolver::new(slf.alloc, slf.asts, slice.reborrow());
2696                let type_ctxt = slf.type_ctxt.clone();
2697                let wildcards_map = measure_runtime!(
2698                    "runtime:type_check",
2699                    typecheck(slf.alloc, ast, type_ctxt, &mut resolver, initial_mode)?
2700                );
2701                slice.wildcards.wildcards.insert(
2702                    file_id,
2703                    wildcards_map.iter().map(ToMainline::to_mainline).collect(),
2704                );
2705                Ok(())
2706            })?;
2707
2708            // Typecheck dependencies (files imported by this file).
2709            if let Some(imports) = slice.import_data.imports.get(&file_id) {
2710                // Because we need to borrow `import_data` for typechecking, we need to release the
2711                // borrow by moving the content of `imports` somewhere else.
2712                //
2713                // We ignore non-Nickel imports, which aren't typechecked, and are currently not
2714                // even in the AST cache.
2715                let imports: Vec<_> = imports
2716                    .iter()
2717                    .filter_map(|tgt| {
2718                        if let InputFormat::Nickel = tgt.format {
2719                            Some(tgt.file_id)
2720                        } else {
2721                            None
2722                        }
2723                    })
2724                    .collect();
2725
2726                for file_id in imports {
2727                    self.typecheck(slice.reborrow(), file_id, initial_mode)?;
2728                }
2729            }
2730
2731            // unwrap(): we checked at the beginning of this function that the AST is in the
2732            // cache.
2733            let _ = self
2734                .update_state(file_id, AstEntryState::Typechecked)
2735                .unwrap();
2736
2737            Ok(CacheOp::Done(()))
2738        }
2739
2740        /// Typechecks the stdlib. This has to be public because it's used in benches. It probably
2741        /// does not have to be used for something else.
2742        pub fn typecheck_stdlib(
2743            &mut self,
2744            mut slice: CacheHubView<'_>,
2745        ) -> Result<CacheOp<()>, AstCacheError<TypecheckError>> {
2746            let mut ret = CacheOp::Cached(());
2747            self.populate_type_ctxt(slice.sources);
2748
2749            for (_, stdlib_module_id) in slice.sources.stdlib_modules() {
2750                let result =
2751                    self.typecheck(slice.reborrow(), stdlib_module_id, TypecheckMode::Walk)?;
2752
2753                if let CacheOp::Done(()) = result {
2754                    ret = CacheOp::Done(());
2755                }
2756            }
2757
2758            Ok(ret)
2759        }
2760
2761        /// Typechecks a file (if it wasn't already) and returns the inferred type, with type
2762        /// wildcards properly substituted.
2763        pub fn type_of(
2764            &mut self,
2765            mut slice: CacheHubView<'_>,
2766            file_id: FileId,
2767        ) -> Result<CacheOp<mainline_typ::Type>, AstCacheError<TypecheckError>> {
2768            self.typecheck(slice.reborrow(), file_id, TypecheckMode::Walk)?;
2769
2770            let typ: Result<ast::typ::Type<'_>, AstCacheError<TypecheckError>> =
2771                self.with_mut(|slf| {
2772                    let ast = slf
2773                        .asts
2774                        .get(&file_id)
2775                        .ok_or(CacheError::IncompatibleState {
2776                            want: AstEntryState::Parsed,
2777                        })?
2778                        .ast;
2779
2780                    let mut resolver = AstResolver::new(slf.alloc, slf.asts, slice.reborrow());
2781                    let type_ctxt = slf.type_ctxt.clone();
2782
2783                    let typ = TryConvert::try_convert(
2784                        slf.alloc,
2785                        ast.apparent_type(
2786                            slf.alloc,
2787                            Some(&type_ctxt.type_env),
2788                            Some(&mut resolver),
2789                        ),
2790                    )
2791                    .unwrap_or(ast::typ::TypeF::Dyn.into());
2792                    Ok(typ)
2793                });
2794            let typ = typ?;
2795
2796            let target: mainline_typ::Type = typ.to_mainline();
2797
2798            // unwrap(): we ensured that the file is typechecked, thus its wildcards and its AST
2799            // must be populated
2800            let wildcards = slice.wildcards.get(file_id).unwrap();
2801
2802            Ok(CacheOp::Done(
2803                target
2804                    .traverse(
2805                        &mut |ty: mainline_typ::Type| -> Result<_, std::convert::Infallible> {
2806                            if let mainline_typ::TypeF::Wildcard(id) = ty.typ {
2807                                Ok(wildcards
2808                                    .get(id)
2809                                    .cloned()
2810                                    .unwrap_or(mainline_typ::Type::from(mainline_typ::TypeF::Dyn)))
2811                            } else {
2812                                Ok(ty)
2813                            }
2814                        },
2815                        TraverseOrder::TopDown,
2816                    )
2817                    .unwrap(),
2818            ))
2819        }
2820
2821        /// If the type context hasn't been created yet, generate and cache the initial typing
2822        /// context from the list of `file_ids` corresponding to the standard library parts.
2823        /// Otherwise, do nothing.
2824        fn populate_type_ctxt(&mut self, sources: &SourceCache) {
2825            self.with_mut(|slf| {
2826                if !slf.type_ctxt.is_empty() {
2827                    return;
2828                }
2829                let stdlib_terms_vec: Vec<(StdlibModule, &'_ Ast<'_>)> = sources
2830                    .stdlib_modules()
2831                    .map(|(module, file_id)| {
2832                        let ast = slf.asts.get(&file_id).map(|entry| entry.ast);
2833
2834                        (
2835                            module,
2836                            ast.expect("cache::ast_cache::AstCache::populate_type_ctxt(): can't build environment, stdlib not parsed")
2837                        )
2838                    })
2839                    .collect();
2840
2841                *slf.type_ctxt = typecheck::mk_initial_ctxt(slf.alloc, stdlib_terms_vec).unwrap();
2842            });
2843        }
2844
2845        /// Adds a binding to the type environment. The bound term is identified by its file id
2846        /// `file_id`.
2847        pub fn add_type_binding(
2848            &mut self,
2849            mut slice: CacheHubView<'_>,
2850            id: LocIdent,
2851            file_id: FileId,
2852        ) -> Result<(), AstCacheError<std::convert::Infallible>> {
2853            self.with_mut(|slf| {
2854                let Some(entry) = slf.asts.get(&file_id) else {
2855                    return Err(CacheError::IncompatibleState {
2856                        want: AstEntryState::Parsed,
2857                    });
2858                };
2859
2860                let ast = entry.ast;
2861                let mut resolver = AstResolver::new(slf.alloc, slf.asts, slice.reborrow());
2862
2863                typecheck::env_add(
2864                    slf.alloc,
2865                    &mut slf.type_ctxt.type_env,
2866                    id,
2867                    ast,
2868                    &slf.type_ctxt.term_env,
2869                    &mut resolver,
2870                );
2871                //slf.asts.extend(resolver.new_asts.into_iter());
2872
2873                slf.type_ctxt
2874                    .term_env
2875                    .0
2876                    .insert(id.ident(), (ast.clone(), slf.type_ctxt.term_env.clone()));
2877                Ok(())
2878            })?;
2879
2880            Ok(())
2881        }
2882
2883        /// Add the bindings of a record to the type environment. Ignore fields whose name are
2884        /// defined through interpolation.
2885        pub fn add_type_bindings(
2886            &mut self,
2887            mut slice: CacheHubView<'_>,
2888            term: &RichTerm,
2889        ) -> Result<(), NotARecord> {
2890            self.with_mut(|slf| {
2891                // It's sad, but for now, we have to convert the term back to an AST to insert it in
2892                // the type environment.
2893                let ast = term.to_ast(slf.alloc);
2894                let mut resolver = AstResolver::new(slf.alloc, slf.asts, slice.reborrow());
2895
2896                let ret = typecheck::env_add_term(
2897                    slf.alloc,
2898                    &mut slf.type_ctxt.type_env,
2899                    ast,
2900                    &slf.type_ctxt.term_env,
2901                    &mut resolver,
2902                )
2903                .map_err(|_| NotARecord);
2904                ret
2905            })
2906        }
2907    }
2908}
2909
2910#[cfg(test)]
2911mod tests {
2912    use std::path::Path;
2913
2914    use super::*;
2915
2916    #[test]
2917    fn normalize_rel() {
2918        assert_eq!(
2919            &normalize_rel_path(Path::new("../a/../b")),
2920            Path::new("../b")
2921        );
2922        assert_eq!(
2923            &normalize_rel_path(Path::new("../../a/../b")),
2924            Path::new("../../b")
2925        );
2926    }
2927
2928    #[test]
2929    fn get_cached_source_with_relative_path() {
2930        let mut sources = SourceCache::new();
2931        let root_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("nickel-test-rootdir");
2932        let path = SourcePath::Path(root_path.join("file.ncl"), super::InputFormat::Nickel);
2933        let file_id = sources.replace_string(path, "1".into());
2934
2935        // This path should not exist on the host but should
2936        // match the in memory file that was set up in the cache
2937        let file = sources
2938            .get_or_add_file(
2939                root_path.join("subdir").join("..").join("file.ncl"),
2940                InputFormat::Nickel,
2941            )
2942            .expect("Missed cached file when pulling with relative path");
2943        assert_eq!(CacheOp::Cached(file_id), file);
2944    }
2945
2946    #[test]
2947    fn close_file() {
2948        let mut sources = SourceCache::new();
2949        let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("closed.ncl");
2950        let source_path = SourcePath::Path(path.clone(), InputFormat::Nickel);
2951        sources.add_string(source_path.clone(), "1".to_string());
2952        sources
2953            .close_in_memory_file(path.clone(), InputFormat::Nickel)
2954            .unwrap();
2955        assert_eq!(
2956            sources
2957                .file_ids
2958                .get(&source_path)
2959                .map(|it| it.source)
2960                .unwrap(),
2961            SourceKind::MemoryClosed
2962        );
2963
2964        // Since the closed file should be stale, id_or_new_timestamp_of should not return the
2965        // file ID for the closed file. Since in this case the file doesn't exist on the
2966        // filesystem, it should return an error.
2967        assert!(sources
2968            .id_or_new_timestamp_of(&path, InputFormat::Nickel)
2969            .is_err());
2970    }
2971}
nickel_lang_core/cache.rs

nickel_lang_core/
cache.rs