nickel_lang_core/
cache.rs

1//! Various caches for artifacts generated across the whole pipeline: source code, parsed
2//! representations, imports data (dependencies and reverse dependencies, etc.)
3//!
4//! In order to manage the complexity of correctly borrowing such structures, where the arena
5//! allocation of ASTs requires usage of self-borrowing structures, the main cache is split in
6//! different subcaches that can be borrowed independently.
7pub use ast_cache::AstCache;
8
9pub use nickel_lang_parser::ast::InputFormat;
10
11use crate::{
12    ast::{
13        self, Ast, AstAlloc, TryConvert,
14        compat::{ToAst, ToMainline},
15    },
16    closurize::Closurize as _,
17    error::{Error, ImportError, ImportErrorKind, ParseError, ParseErrors, TypecheckError},
18    eval::{self, cache::Cache as EvalCache, value::NickelValue},
19    files::{FileId, Files},
20    identifier::LocIdent,
21    metrics::measure_runtime,
22    package::PackageMap,
23    parser::{self, ErrorTolerantParser, ExtendedTerm, lexer::Lexer},
24    position::{PosIdx, PosTable, TermPos},
25    program::FieldPath,
26    serialize::yaml::Listify,
27    stdlib::{self as nickel_stdlib, StdlibModule},
28    term::{self},
29    transform::{self, Wildcards, import_resolution},
30    traverse::TraverseOrder,
31    typ::UnboundTypeVariableError,
32    typecheck::{self, HasApparentType, TypecheckMode, typecheck},
33};
34
35#[cfg(feature = "nix-experimental")]
36use crate::nix_ffi;
37
38use std::{
39    collections::{HashMap, HashSet, hash_map},
40    ffi::{OsStr, OsString},
41    fmt, fs,
42    io::{self, Read},
43    path::{Path, PathBuf},
44    result::Result,
45    sync::Arc,
46    time::SystemTime,
47};
48
49use ouroboros::self_referencing;
50
51/// Error when trying to add bindings to the typing context where the given term isn't a record
52/// literal.
53pub struct NotARecord;
54
55/// The term cache stores the parsed values (the runtime representation) of sources.
56#[derive(Debug, Clone)]
57pub struct TermCache {
58    /// The term table stores parsed terms corresponding to the entries of the file database.
59    terms: HashMap<FileId, TermEntry>,
60}
61
62#[derive(Debug, Clone, Copy, Eq, PartialEq)]
63pub struct TermNotFound;
64
65impl TermCache {
66    pub fn new() -> Self {
67        TermCache {
68            terms: HashMap::new(),
69        }
70    }
71
72    /// Updates the state of an entry and returns the previous state, or an error if the entry
73    /// isn't in the cache.
74    pub fn update_state(
75        &mut self,
76        file_id: FileId,
77        new: TermEntryState,
78    ) -> Result<TermEntryState, TermNotFound> {
79        self.terms
80            .get_mut(&file_id)
81            .map(|TermEntry { state, .. }| std::mem::replace(state, new))
82            .ok_or(TermNotFound)
83    }
84
85    /// Applies program transformations excepted import resolution, implemented in a separate phase.
86    fn transform(
87        &mut self,
88        pos_table: &mut PosTable,
89        wildcards: &WildcardsCache,
90        import_data: &ImportData,
91        file_id: FileId,
92    ) -> Result<CacheOp<()>, TermCacheError<UnboundTypeVariableError>> {
93        match self.terms.get(&file_id).map(|entry| entry.state) {
94            Some(state) if state >= TermEntryState::Transformed => Ok(CacheOp::Cached(())),
95            Some(state) => {
96                if state < TermEntryState::Transforming {
97                    let cached_term = self.terms.remove(&file_id).unwrap();
98                    let term = transform::transform(
99                        pos_table,
100                        cached_term.value,
101                        wildcards.wildcards.get(&file_id),
102                    )?;
103                    self.insert(
104                        file_id,
105                        TermEntry {
106                            value: term,
107                            state: TermEntryState::Transforming,
108                            ..cached_term
109                        },
110                    );
111
112                    let imported: Vec<_> = import_data.imports(file_id).collect();
113                    for file_id in imported {
114                        self.transform(pos_table, wildcards, import_data, file_id)?;
115                    }
116
117                    // unwrap(): we re-inserted the entry after removal and transformation, so it
118                    // should be in the cache.
119                    let _ = self
120                        .update_state(file_id, TermEntryState::Transformed)
121                        .unwrap();
122                }
123
124                Ok(CacheOp::Done(()))
125            }
126            None => Err(CacheError::IncompatibleState {
127                want: TermEntryState::Populated,
128            }),
129        }
130    }
131
132    /// Retrieves the state of an entry. Returns `None` if the entry is not in the term cache. This
133    /// might happen if the file hasn't been parsed, or if the term cache hasn't be filled from the
134    /// AST cache yet. The latter is supposed to happen right before program transformations.
135    pub fn entry_state(&self, file_id: FileId) -> Option<TermEntryState> {
136        self.terms
137            .get(&file_id)
138            .map(|TermEntry { state, .. }| *state)
139    }
140
141    /// Replaces a cache entry by a closurized version of itself. If it contains imports,
142    /// closurize them recursively.
143    ///
144    /// Closurization is not required before evaluation, but it has two benefits:
145    ///
146    /// - the closurized term uses the evaluation cache, so if it is imported in multiple
147    ///   places then they will share a cache
148    /// - the eval cache's built-in mechanism for preventing infinite recursion will also
149    ///   apply to recursive imports.
150    ///
151    /// The main disadvantage of closurization is that it makes the resulting runtime
152    /// representation less useful. You wouldn't want to closurize before pretty-printing, for
153    /// example. This isn't as important these days, since we also have the AST representation at
154    /// hand.
155    pub fn closurize<C: EvalCache>(
156        &mut self,
157        cache: &mut C,
158        import_data: &ImportData,
159        file_id: FileId,
160    ) -> Result<CacheOp<()>, TermCacheError<()>> {
161        match self.entry_state(file_id) {
162            Some(state) if state >= TermEntryState::Closurized => Ok(CacheOp::Cached(())),
163            Some(_) => {
164                let cached_term = self.terms.remove(&file_id).unwrap();
165                let term = cached_term.value.closurize(cache, eval::Environment::new());
166                self.insert(
167                    file_id,
168                    TermEntry {
169                        value: term,
170                        state: TermEntryState::Closurized,
171                        ..cached_term
172                    },
173                );
174
175                let imported: Vec<_> = import_data.imports(file_id).collect();
176                for file_id in imported {
177                    self.closurize(cache, import_data, file_id)?;
178                }
179
180                Ok(CacheOp::Done(()))
181            }
182            None => Err(CacheError::IncompatibleState {
183                want: TermEntryState::Populated,
184            }),
185        }
186    }
187
188    /// Returns an immutable reference to the whole term cache.
189    pub fn terms(&self) -> &HashMap<FileId, TermEntry> {
190        &self.terms
191    }
192
193    /// Retrieves a fresh clone of a cached term.
194    pub fn get_owned(&self, file_id: FileId) -> Option<NickelValue> {
195        self.terms
196            .get(&file_id)
197            .map(|TermEntry { value: term, .. }| term.clone())
198    }
199
200    /// Retrieves a reference to a cached term.
201    pub fn get(&self, file_id: FileId) -> Option<&NickelValue> {
202        self.terms
203            .get(&file_id)
204            .map(|TermEntry { value: term, .. }| term)
205    }
206
207    /// Retrieves the whole entry for a given file id.
208    pub fn get_entry(&self, file_id: FileId) -> Option<&TermEntry> {
209        self.terms.get(&file_id)
210    }
211
212    /// Returns `true` if the term cache contains a term for the given file id.
213    pub fn contains(&self, file_id: FileId) -> bool {
214        self.terms.contains_key(&file_id)
215    }
216
217    /// Inserts a new entry in the cache. Usually, this should be handled by [CacheHub] directly,
218    /// but there are some use-cases where it is useful to pre-fill the term cache (typically in
219    /// NLS).
220    pub fn insert(&mut self, file_id: FileId, entry: TermEntry) {
221        self.terms.insert(file_id, entry);
222    }
223}
224
225/// This is a temporary fix for [#2362](https://github.com/tweag/nickel/issues/2362). File paths
226/// prefixed with this are treated specially: they can refer to in-memory source. To build an
227/// import expression that refers to an in-memory source, append the source name to this prefix and
228/// use it as the path: `format!({IN_MEMORY_SOURCE_PATH_PREFIX}{src_name})`.
229pub const IN_MEMORY_SOURCE_PATH_PREFIX: &str = "%inmem_src%:";
230
231/// The source cache handles reading textual data from the file system or other souces and storing
232/// it in a [Files] instance.
233///
234/// While not ideal, we have to make most of the fields public to allow the LSP to perform its own
235/// import resolution.
236#[derive(Clone)]
237pub struct SourceCache {
238    /// The content of the program sources plus imports.
239    pub files: Files,
240    /// Reverse map from file ids to source paths.
241    pub file_paths: HashMap<FileId, SourcePath>,
242    /// The name-id table, holding file ids stored in the database indexed by source names.
243    pub file_ids: HashMap<SourcePath, NameIdEntry>,
244    /// Paths where to look for imports, as included by the user through either the CLI argument
245    /// `--import-path` or the environment variable `$NICKEL_IMPORT_PATH`.
246    pub import_paths: Vec<PathBuf>,
247    /// A table mapping FileIds to the package that they belong to.
248    ///
249    /// Path dependencies have already been canonicalized to absolute paths.
250    pub packages: HashMap<FileId, PathBuf>,
251    /// The map used to resolve package imports.
252    pub package_map: Option<PackageMap>,
253}
254
255impl SourceCache {
256    pub fn new() -> Self {
257        let files =
258            Files::new(crate::stdlib::modules().map(|m| (m.file_name().to_owned(), m.content())));
259        SourceCache {
260            files,
261            file_paths: HashMap::new(),
262            file_ids: HashMap::new(),
263            import_paths: Vec::new(),
264            packages: HashMap::new(),
265            package_map: None,
266        }
267    }
268
269    /// Retrieves the name of a source given an id.
270    pub fn name(&self, file_id: FileId) -> &OsStr {
271        self.files.name(file_id)
272    }
273
274    /// Add paths to the import path list, where the resolver is looking for imported files.
275    pub fn add_import_paths<P>(&mut self, paths: impl Iterator<Item = P>)
276    where
277        PathBuf: From<P>,
278    {
279        self.import_paths.extend(paths.map(PathBuf::from));
280    }
281
282    /// Sets the package map to use for package import resolution.
283    pub fn set_package_map(&mut self, map: PackageMap) {
284        self.package_map = Some(map);
285    }
286
287    /// Same as [Self::add_file], but assumes that the path is already normalized and takes the
288    /// timestamp as a parameter.
289    fn add_normalized_file(
290        &mut self,
291        path: PathBuf,
292        format: InputFormat,
293        timestamp: SystemTime,
294    ) -> io::Result<FileId> {
295        let contents = std::fs::read_to_string(&path)?;
296        let file_id = self.files.add(&path, contents);
297
298        self.file_paths
299            .insert(file_id, SourcePath::Path(path.clone(), format));
300        self.file_ids.insert(
301            SourcePath::Path(path, format),
302            NameIdEntry {
303                id: file_id,
304                source: SourceKind::Filesystem(timestamp),
305            },
306        );
307        Ok(file_id)
308    }
309
310    /// Loads a file and adds it to the name-id table.
311    ///
312    /// Uses the normalized path and the *modified at* timestamp as the name-id table entry.
313    /// Overrides any existing entry with the same name.
314    pub fn add_file(
315        &mut self,
316        path: impl Into<OsString>,
317        format: InputFormat,
318    ) -> io::Result<FileId> {
319        let path = path.into();
320        let timestamp = timestamp(&path)?;
321        let normalized = normalize_path(&path)?;
322        self.add_normalized_file(normalized, format, timestamp)
323    }
324
325    /// Try to retrieve the id of a file from the cache.
326    ///
327    /// If it was not in cache, try to read it and add it as a new entry.
328    ///
329    /// # In memory sources
330    ///
331    /// As a temporary fix for [#2362](https://github.com/tweag/nickel/issues/2362), if a file path
332    /// starts with [IN_MEMORY_SOURCE_PATH_PREFIX], the suffix is looked up un-normalized value
333    /// first, which makes it possible to hit in-memory only sources by importing a path
334    /// `"{SOURCE_PATH_PREFX}{src_name}"`. If it can't be found, it is looked up normally, so that
335    /// it doesn't break strange file names that happen to contain the source path prefix.
336    ///
337    /// It is theoretically possible that if both the source "abc" and the file
338    /// "{IN_MEMORY_SOURCE_PATH_PREFIX}abc" exist, the source is imported instead of the intended
339    /// file. However, given the prefix, it just can't be accidental. As we want to give access to
340    /// in-memory sources in any case, although this can be surprising, I don't see any obvious
341    /// attack scenario here. This fix is also intended to be temporary. If you still need to make
342    /// sure this doesn't happen, one way would be to add some randomness to the name of the
343    /// sources, so that they can't be predicted beforehand.
344    pub fn get_or_add_file(
345        &mut self,
346        path: impl Into<OsString>,
347        format: InputFormat,
348    ) -> io::Result<CacheOp<FileId>> {
349        let path = path.into();
350        let normalized = normalize_path(&path)?;
351
352        // Try to fetch a generated source if the path starts with a hardcoded prefix
353        let generated_entry = path
354            .to_str()
355            .and_then(|p| p.strip_prefix(IN_MEMORY_SOURCE_PATH_PREFIX))
356            .and_then(|src_name| {
357                self.file_ids
358                    .get(&SourcePath::Path(src_name.into(), format))
359            });
360
361        if let Some(entry) = generated_entry {
362            return Ok(CacheOp::Cached(entry.id));
363        }
364
365        match self.id_or_new_timestamp_of(normalized.as_ref(), format)? {
366            SourceState::UpToDate(id) => Ok(CacheOp::Cached(id)),
367            SourceState::Stale(timestamp) => self
368                .add_normalized_file(normalized, format, timestamp)
369                .map(CacheOp::Done),
370        }
371    }
372
373    /// Load a source and add it to the name-id table.
374    ///
375    /// Do not check if a source with the same name already exists: if it is the case,
376    /// [Self::add_source] will happily will override the old entry in the name-id table.
377    pub fn add_source<T>(&mut self, source_name: SourcePath, mut source: T) -> io::Result<FileId>
378    where
379        T: Read,
380    {
381        let mut buffer = String::new();
382        source.read_to_string(&mut buffer)?;
383        Ok(self.add_string(source_name, buffer))
384    }
385
386    /// Returns the content of a file.
387    ///
388    /// Panics if the file id is invalid.
389    pub fn source(&self, id: FileId) -> &str {
390        self.files.source(id)
391    }
392
393    /// Returns a cloned `Arc` to the content of the file.
394    ///
395    /// The `Arc` is here for the LSP, where the background evaluation is handled by background
396    /// threads and processes.
397    ///
398    /// Panics if the file id is invalid.
399    pub fn clone_source(&self, id: FileId) -> Arc<str> {
400        self.files.clone_source(id)
401    }
402
403    /// Loads a new source as a string and add it to the name-id table.
404    ///
405    /// Do not check if a source with the same name already exists: if it is the case, this one
406    /// will override the old entry in the name-id table but the old `FileId` will remain valid.
407    pub fn add_string(&mut self, source_name: SourcePath, s: String) -> FileId {
408        let id = self.files.add(source_name.clone(), s);
409
410        self.file_paths.insert(id, source_name.clone());
411        self.file_ids.insert(
412            source_name,
413            NameIdEntry {
414                id,
415                source: SourceKind::Memory,
416            },
417        );
418        id
419    }
420
421    /// Loads a new source as a string, replacing any existing source with the same name.
422    ///
423    /// As opposed to [CacheHub::replace_string], this method doesn't update the other caches. It
424    /// just affects the source cache.
425    pub fn replace_string(&mut self, source_name: SourcePath, s: String) -> FileId {
426        if let Some(file_id) = self.id_of(&source_name) {
427            // The file may have been originally loaded from the filesystem and then
428            // updated by the LSP, so the SourceKind needs to be updated to Memory.
429            self.file_ids.insert(
430                source_name,
431                NameIdEntry {
432                    id: file_id,
433                    source: SourceKind::Memory,
434                },
435            );
436            self.files.update(file_id, s);
437            file_id
438        } else {
439            // We re-use [Self::add_string] here to properly fill the file_paths and file_ids
440            // tables.
441            self.add_string(source_name, s)
442        }
443    }
444
445    /// Closes a file that has been opened in memory and reloads it from the filesystem.
446    /// Returns the file ID of the replacement file loaded from the filesystem.
447    pub fn close_in_memory_file(
448        &mut self,
449        path: PathBuf,
450        format: InputFormat,
451    ) -> Result<FileCloseResult, FileCloseError> {
452        let entry = self
453            .file_ids
454            .get_mut(&SourcePath::Path(path.clone(), format))
455            .ok_or(FileCloseError::FileIdNotFound)?;
456        match &entry.source {
457            SourceKind::Memory => {
458                let closed_id = entry.id;
459                entry.source = SourceKind::MemoryClosed;
460                let replacement_id = self.get_or_add_file(path, format).map(|op| op.inner());
461                Ok(FileCloseResult {
462                    closed_id,
463                    replacement_id,
464                })
465            }
466            _ => Err(FileCloseError::FileNotOpen),
467        }
468    }
469
470    /// Retrieves the id of a source given a name.
471    ///
472    /// Note that files added via [Self::add_file] are indexed by their full normalized path (cf
473    /// [normalize_path]).
474    pub fn id_of(&self, name: &SourcePath) -> Option<FileId> {
475        match name {
476            SourcePath::Path(p, fmt) => match self.id_or_new_timestamp_of(p, *fmt).ok()? {
477                SourceState::UpToDate(id) => Some(id),
478                SourceState::Stale(_) => None,
479            },
480            name => Some(self.file_ids.get(name)?.id),
481        }
482    }
483
484    /// Tries to retrieve the id of a cached source.
485    ///
486    /// Only returns `Ok` if the source is up-to-date; if the source is stale, returns
487    /// either the new timestamp of the up-to-date file or the error we encountered when
488    /// trying to read it (which most likely means there was no such file).
489    ///
490    /// The main point of this awkward signature is to minimize I/O operations: if we accessed
491    /// the timestamp, keep it around.
492    fn id_or_new_timestamp_of(&self, name: &Path, format: InputFormat) -> io::Result<SourceState> {
493        match self
494            .file_ids
495            .get(&SourcePath::Path(name.to_owned(), format))
496        {
497            None
498            | Some(NameIdEntry {
499                source: SourceKind::MemoryClosed,
500                ..
501            }) => Ok(SourceState::Stale(timestamp(name)?)),
502            Some(NameIdEntry {
503                id,
504                source: SourceKind::Filesystem(ts),
505            }) => {
506                let new_timestamp = timestamp(name)?;
507                if ts == &new_timestamp {
508                    Ok(SourceState::UpToDate(*id))
509                } else {
510                    Ok(SourceState::Stale(new_timestamp))
511                }
512            }
513            Some(NameIdEntry {
514                id,
515                source: SourceKind::Memory,
516            }) => Ok(SourceState::UpToDate(*id)),
517        }
518    }
519
520    /// Gets a reference to the underlying files. Required by the WASM REPL error reporting code
521    /// and LSP functions.
522    pub fn files(&self) -> &Files {
523        &self.files
524    }
525
526    /// Parses a Nickel source without querying nor populating other caches.
527    pub fn parse_nickel<'ast>(
528        &self,
529        // We take the allocator explicitly, to make sure `self.asts` is properly initialized
530        // before calling this function, and won't be dropped.
531        alloc: &'ast AstAlloc,
532        file_id: FileId,
533    ) -> Result<Ast<'ast>, ParseErrors> {
534        parse_nickel(alloc, file_id, self.files.source(file_id))
535    }
536
537    /// Parses a source that isn't Nickel code without querying nor populating the other caches. Support
538    /// multiple formats.
539    ///
540    /// The Nickel/non Nickel distinction is a bit artificial at the moment, due to the fact that
541    /// parsing Nickel returns the new [crate::ast::Ast], while parsing other formats
542    /// don't go through the new AST first but directly deserialize to the legacy
543    /// [crate::term::Term] for simplicity and performance reasons.
544    ///
545    /// Once RFC007 is fully implemented, we might clean it up.
546    ///
547    /// # Panic
548    ///
549    /// This function panics if `format` is [InputFormat::Nickel].
550    pub fn parse_other(
551        &self,
552        pos_table: &mut PosTable,
553        file_id: FileId,
554        format: InputFormat,
555    ) -> Result<NickelValue, ParseError> {
556        let whole_span: TermPos = self.files.source_span(file_id).into();
557        let pos_idx = pos_table.push(whole_span);
558
559        let source = self.files.source(file_id);
560
561        match format {
562            InputFormat::Nickel => {
563                // Panicking isn't great, but we expect this to be temporary, until RFC007 is fully
564                // implemented. And this case is an internal bug.
565                panic!("error: trying to parse a Nickel source with parse_other_nocache")
566            }
567            InputFormat::Json => {
568                crate::serialize::yaml::load_json_value(pos_table, source, Some(file_id))
569            }
570            InputFormat::Yaml => crate::serialize::yaml::load_yaml_value(
571                pos_table,
572                source,
573                Some(file_id),
574                Listify::Auto,
575            ),
576            InputFormat::Toml => crate::serialize::toml_deser::from_str(pos_table, source, file_id)
577                .map(|v: NickelValue| v.with_pos_idx(pos_idx))
578                .map_err(|err| ParseError::from_toml(err, file_id)),
579            #[cfg(feature = "nix-experimental")]
580            InputFormat::Nix => {
581                let json = nix_ffi::eval_to_json(source, &self.get_base_dir_for_nix(file_id))
582                    .map_err(|e| ParseError::from_nix(e.what(), file_id))?;
583                serde_json::from_str(&json)
584                    .map(|v: NickelValue| v.with_pos_idx(pos_idx))
585                    .map_err(|err| ParseError::from_serde_json(err, Some((file_id, &self.files))))
586            }
587            InputFormat::Text => Ok(NickelValue::string(source, pos_idx)),
588        }
589    }
590
591    /// Returns true if a particular file id represents a Nickel standard library file, false
592    /// otherwise.
593    pub fn is_stdlib_module(&self, file: FileId) -> bool {
594        self.files.is_stdlib(file)
595    }
596
597    /// Retrieves the file id for a given standard libray module.
598    pub fn get_submodule_file_id(&self, module: StdlibModule) -> Option<FileId> {
599        self.stdlib_modules()
600            .find(|(m, _id)| m == &module)
601            .map(|(_, id)| id)
602    }
603
604    /// Returns the list of file ids corresponding to the standard library modules.
605    pub fn stdlib_modules(&self) -> impl Iterator<Item = (StdlibModule, FileId)> + use<> {
606        let ids = self.files.stdlib_modules();
607        crate::stdlib::modules().into_iter().zip(ids)
608    }
609
610    /// Return the format of a given source. Returns `None` if there is no entry in the source
611    /// cache for `file_id`, or if there is no well-defined input format (e.g. for REPL inputs,
612    /// field assignments, etc.).
613    pub fn input_format(&self, file_id: FileId) -> Option<InputFormat> {
614        self.file_paths
615            .get(&file_id)
616            .and_then(|source| match source {
617                SourcePath::Path(_, input_format) => Some(*input_format),
618                SourcePath::Std(_) => Some(InputFormat::Nickel),
619                SourcePath::Snippet(_)
620                | SourcePath::Query
621                | SourcePath::ReplInput(_)
622                | SourcePath::ReplTypecheck
623                | SourcePath::ReplQuery
624                | SourcePath::CliFieldAssignment
625                | SourcePath::Override(_)
626                | SourcePath::Generated(_) => None,
627            })
628    }
629
630    /// Returns the base path for Nix evaluation, which is the parent directory of the source file
631    /// if any, or the current working directory, or an empty path if we couldn't find any better.
632    #[cfg(feature = "nix-experimental")]
633    fn get_base_dir_for_nix(&self, file_id: FileId) -> PathBuf {
634        let parent_dir = self
635            .file_paths
636            .get(&file_id)
637            .and_then(|source_path| Path::new(<&OsStr>::try_from(source_path).ok()?).parent());
638
639        parent_dir
640            .map(PathBuf::from)
641            .or_else(|| std::env::current_dir().ok())
642            .unwrap_or_default()
643    }
644}
645
646/// Stores the mapping of each wildcard id to its inferred type, for each file in the cache.
647#[derive(Default, Clone, Debug)]
648pub struct WildcardsCache {
649    wildcards: HashMap<FileId, Wildcards>,
650}
651
652impl WildcardsCache {
653    pub fn new() -> Self {
654        Self::default()
655    }
656
657    pub fn get(&self, file_id: FileId) -> Option<&Wildcards> {
658        self.wildcards.get(&file_id)
659    }
660}
661
662/// Metadata about an imported file.
663#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
664pub struct ImportTarget {
665    pub file_id: FileId,
666    pub format: InputFormat,
667}
668
669/// Stores dependencies and reverse dependencies data between sources.
670#[derive(Default, Clone)]
671pub struct ImportData {
672    /// A map containing for each FileId a list of files they import (directly).
673    pub imports: HashMap<FileId, HashSet<ImportTarget>>,
674    /// A map containing for each FileId a list of files importing them (directly). Note that we
675    /// don't need to store the format here, as only Nickel files can import other files. We do
676    /// however store the position of the first import expression (the same file can be imported
677    /// many times from a given file), for error reporting purpose.
678    pub rev_imports: HashMap<FileId, HashMap<FileId, TermPos>>,
679}
680
681impl ImportData {
682    pub fn new() -> Self {
683        Self::default()
684    }
685
686    /// Returns the set of files that this file imports.
687    pub fn imports(&self, file: FileId) -> impl Iterator<Item = FileId> + '_ {
688        self.imports
689            .get(&file)
690            .into_iter()
691            .flat_map(|s| s.iter())
692            .map(|tgt| tgt.file_id)
693    }
694
695    /// Returns the set of files that import this file.
696    pub fn rev_imports(&self, file: FileId) -> impl Iterator<Item = FileId> + '_ {
697        self.rev_imports
698            .get(&file)
699            .into_iter()
700            .flat_map(|h| h.keys())
701            .copied()
702    }
703
704    /// Returns the set of files that transitively depend on this file.
705    pub fn transitive_rev_imports(&self, file: FileId) -> HashSet<FileId> {
706        let mut ret = HashSet::new();
707        let mut stack = vec![file];
708
709        while let Some(file) = stack.pop() {
710            for f in self.rev_imports(file) {
711                if ret.insert(f) {
712                    stack.push(f);
713                }
714            }
715        }
716
717        ret
718    }
719
720    /// Returns the set of files that this file transitively depends on.
721    pub fn transitive_imports(&self, file: FileId) -> HashSet<FileId> {
722        let mut ret = HashSet::new();
723        let mut stack = vec![file];
724
725        while let Some(file) = stack.pop() {
726            for f in self.imports(file) {
727                if ret.insert(f) {
728                    stack.push(f);
729                }
730            }
731        }
732
733        ret
734    }
735
736    /// Returns `true` if those import data are empty.
737    pub fn is_empty(&self) -> bool {
738        self.imports.is_empty() && self.rev_imports.is_empty()
739    }
740}
741
742/// The cache hub aggregates the various kind of source-related caches used by Nickel.
743///
744/// [CacheHub] handles parsing, typechecking and program transformation of sources, as well as
745/// caching the corresponding artifacts (text, ASTs, state). This is the central entry point for
746/// other modules.
747///
748/// # RFC007
749///
750/// As part of the migration to a new AST required by RFC007, as long as we don't have a fully
751/// working bytecode virtual machine, the cache needs to keep parsed expressions both as the old
752/// representation (dubbed "mainline" or the runtime representation in many places) and as the new
753/// AST representation.
754pub struct CacheHub {
755    pub terms: TermCache,
756    pub sources: SourceCache,
757    pub asts: AstCache,
758    pub wildcards: WildcardsCache,
759    pub import_data: ImportData,
760    #[cfg(debug_assertions)]
761    /// Skip loading the stdlib, used for debugging purpose
762    pub skip_stdlib: bool,
763}
764
765impl CacheHub {
766    pub fn new() -> Self {
767        CacheHub {
768            terms: TermCache::new(),
769            sources: SourceCache::new(),
770            asts: AstCache::empty(),
771            wildcards: WildcardsCache::new(),
772            import_data: ImportData::new(),
773            #[cfg(debug_assertions)]
774            skip_stdlib: false,
775        }
776    }
777
778    /// Actual implementation of [Self::parse_ast] which doesn't take `self` as a parameter, so that it
779    /// can be reused from other places when we don't have a full [CacheHub] instance at hand.
780    fn parse_ast_impl(
781        asts: &mut AstCache,
782        sources: &mut SourceCache,
783        file_id: FileId,
784    ) -> Result<CacheOp<()>, ParseErrors> {
785        if asts.contains(file_id) {
786            Ok(CacheOp::Cached(()))
787        } else {
788            let _ = asts.parse_nickel(file_id, sources.files.source(file_id))?;
789            Ok(CacheOp::Done(()))
790        }
791    }
792
793    /// Parse a REPL input and populate the corresponding entry in the cache.
794    ///
795    /// The first component of the tuple in the `Ok` case is the identifier of the toplevel let, if
796    /// the input is a toplevel let, or `None` if the input is a standard Nickel expression.
797    ///
798    /// # RFC007
799    ///
800    /// This method populates both the ast cache and the term cache at once.
801    pub fn parse_repl(
802        &mut self,
803        pos_table: &mut PosTable,
804        file_id: FileId,
805    ) -> Result<CacheOp<Option<LocIdent>>, ParseErrors> {
806        // Since we need the identifier, we always reparse the input. In any case, it doesn't
807        // happen that we the same REPL input twice right now, so caching it is in fact useless.
808        // It's just must simpler to reuse the cache infrastructure than to reimplement the whole
809        // transformations and import dependencies tracking elsewhere.
810        let extd_ast = self
811            .asts
812            .parse_nickel_repl(file_id, self.sources.files.source(file_id))?;
813
814        let (id, ast) = match extd_ast {
815            ExtendedTerm::Term(t) => (None, t),
816            ExtendedTerm::ToplevelLet(id, t) => (Some(id), t),
817        };
818
819        let term = measure_runtime!("runtime:ast_conversion", ast.to_mainline(pos_table));
820
821        self.terms.insert(
822            file_id,
823            TermEntry {
824                value: term,
825                state: TermEntryState::default(),
826                format: InputFormat::Nickel,
827            },
828        );
829
830        Ok(CacheOp::Done(id))
831    }
832
833    /// Parses a source and populate the corresponding entry in the AST cache, or do nothing if the
834    /// entry has already been parsed. External input formats are currently directly parsed to the
835    /// runtime representation, without going through an AST: currently, the format is assumed to
836    /// be [InputFormat::Nickel] in this method. See [Self::parse_to_term] for other formats.
837    ///
838    /// # RFC007
839    ///
840    /// This method only populates the AST cache. The term cache must be filled separately.
841    pub fn parse_to_ast(&mut self, file_id: FileId) -> Result<CacheOp<()>, ParseErrors> {
842        Self::parse_ast_impl(&mut self.asts, &mut self.sources, file_id)
843    }
844
845    /// Parses a source or compiles an AST into the term cache:
846    ///
847    /// - if the entry is already in the term cache, do nothing.
848    /// - if the format is Nickel and there is a corresponding entry in the AST cache, converts the
849    ///   parsed AST to a [NickelValue] and put it in the term cache.
850    /// - if the format is Nickel but there is no cached AST, or if the format is not Nickel, parse
851    ///   the input directly into the term cache.
852    ///
853    /// Mostly used during ([NickelValue]-based) import resolution.
854    pub fn parse_to_term(
855        &mut self,
856        pos_table: &mut PosTable,
857        file_id: FileId,
858        format: InputFormat,
859    ) -> Result<CacheOp<()>, ParseErrors> {
860        if self.terms.contains(file_id) {
861            return Ok(CacheOp::Cached(()));
862        }
863
864        let term = if let InputFormat::Nickel = format {
865            match self.compile(pos_table, file_id) {
866                Ok(cache_op) => return Ok(cache_op),
867                Err(_) => {
868                    let alloc = AstAlloc::new();
869                    self.sources
870                        .parse_nickel(&alloc, file_id)?
871                        .to_mainline(pos_table)
872                }
873            }
874        } else {
875            self.sources.parse_other(pos_table, file_id, format)?
876        };
877
878        self.terms.insert(
879            file_id,
880            TermEntry {
881                value: term,
882                state: TermEntryState::default(),
883                format,
884            },
885        );
886
887        Ok(CacheOp::Done(()))
888    }
889
890    /// Typecheck an entry of the cache and update its state accordingly, or do nothing if the
891    /// entry has already been typechecked. Require that the corresponding source has been parsed.
892    /// If the source contains imports, [Self::typecheck] recursively typechecks the imports as
893    /// well.
894    ///
895    /// # RFC007
896    ///
897    /// During the transition period between the old VM and the new bytecode VM, this method
898    /// performs typechecking on the new representation [crate::ast::Ast].
899    pub fn typecheck(
900        &mut self,
901        file_id: FileId,
902        initial_mode: TypecheckMode,
903    ) -> Result<CacheOp<()>, AstCacheError<TypecheckError>> {
904        let (slice, asts) = self.split_asts();
905        asts.typecheck(slice, file_id, initial_mode)
906    }
907
908    /// Returns the apparent type of an entry that has been typechecked with wildcards substituted.
909    pub fn type_of(
910        &mut self,
911        file_id: FileId,
912    ) -> Result<CacheOp<ast::typ::Type<'_>>, AstCacheError<TypecheckError>> {
913        let (slice, asts) = self.split_asts();
914        asts.type_of(slice, file_id)
915    }
916
917    /// Prepares a source for evaluation: parse, typecheck and apply program transformations, if it
918    /// was not already done.
919    pub fn prepare(
920        &mut self,
921        pos_table: &mut PosTable,
922        file_id: FileId,
923    ) -> Result<CacheOp<()>, Error> {
924        self.prepare_impl(pos_table, file_id, true)
925    }
926
927    /// Prepare a file for evaluation only. Same as [Self::prepare], but doesn't typecheck the
928    /// source.
929    pub fn prepare_eval_only(
930        &mut self,
931        pos_table: &mut PosTable,
932        file_id: FileId,
933    ) -> Result<CacheOp<()>, Error> {
934        self.prepare_impl(pos_table, file_id, false)
935    }
936
937    /// Common implementation for [Self::prepare] and [Self::prepare_eval_only], which optionally
938    /// skips typechecking. Note that this method doesn't load and prepare the stdlib.
939    fn prepare_impl(
940        &mut self,
941        pos_table: &mut PosTable,
942        file_id: FileId,
943        typecheck: bool,
944    ) -> Result<CacheOp<()>, Error> {
945        let mut result = CacheOp::Cached(());
946
947        let format = self
948            .sources
949            .file_paths
950            .get(&file_id)
951            .and_then(Option::<InputFormat>::from)
952            .unwrap_or_default();
953
954        if let InputFormat::Nickel = format {
955            if let CacheOp::Done(_) = self.parse_to_ast(file_id)? {
956                result = CacheOp::Done(());
957            }
958
959            if typecheck {
960                let (slice, asts) = self.split_asts();
961
962                let typecheck_res = asts
963                    .typecheck(slice, file_id, TypecheckMode::Walk)
964                    .map_err(|cache_err| {
965                        cache_err.unwrap_error(
966                            "cache::prepare(): expected source to be parsed before typechecking",
967                        )
968                    })?;
969
970                if typecheck_res == CacheOp::Done(()) {
971                    result = CacheOp::Done(());
972                };
973            }
974        }
975        // Non-Nickel terms are currently not parsed as ASTs, but directly as the runtime
976        // representation. While the imports of the main file will be parsed to terms by the
977        // `compile_and_transform` automatically, we do need to ensure that the main file is in the
978        // term cache if it's an external format, or `compile_and_transform` will complain.
979        else if let CacheOp::Done(_) = self.parse_to_term(pos_table, file_id, format)? {
980            result = CacheOp::Done(());
981        }
982
983        let transform_res =
984            self.compile_and_transform(pos_table, file_id)
985                .map_err(|cache_err| {
986                    cache_err.unwrap_error(
987                        "cache::prepare(): expected source to be parsed before transformations",
988                    )
989                })?;
990
991        if transform_res == CacheOp::Done(()) {
992            result = CacheOp::Done(());
993        };
994
995        Ok(result)
996    }
997
998    /// Prepare an REPL snippet for evaluation: parse, typecheck and apply program transformations,
999    /// if it was not already done. The difference with [Self::prepare] is that this method also
1000    /// accept toplevel binding `let <id> = <value>`.
1001    ///
1002    /// Returns the identifier of the toplevel let, if the input is a toplevel let, or `None` if
1003    /// the input is a standard Nickel expression.
1004    pub fn prepare_repl(
1005        &mut self,
1006        pos_table: &mut PosTable,
1007        file_id: FileId,
1008    ) -> Result<CacheOp<Option<LocIdent>>, Error> {
1009        let mut done = false;
1010
1011        let parsed = self.parse_repl(pos_table, file_id)?;
1012
1013        done = done || matches!(parsed, CacheOp::Done(_));
1014
1015        let id = parsed.inner();
1016
1017        let (slice, asts) = self.split_asts();
1018        let typecheck_res = asts
1019            .typecheck(slice, file_id, TypecheckMode::Walk)
1020            .map_err(|cache_err| {
1021                cache_err.unwrap_error(
1022                    "cache::prepare_repl(): expected source to be parsed before typechecking",
1023                )
1024            })?;
1025
1026        if let Some(id) = id {
1027            let (slice, asts) = self.split_asts();
1028            asts
1029                .add_type_binding(
1030                    slice,
1031                    id,
1032                    file_id,
1033                ).expect("cache::prepare_repl(): expected source to be parsed before augmenting the type environment");
1034        }
1035
1036        done = done || matches!(typecheck_res, CacheOp::Done(_));
1037
1038        let transform_res =
1039            self.compile_and_transform(pos_table, file_id)
1040                .map_err(|cache_err| {
1041                    cache_err.unwrap_error(
1042                        "cache::prepare(): expected source to be parsed before transformations",
1043                    )
1044                })?;
1045
1046        done = done || matches!(transform_res, CacheOp::Done(_));
1047
1048        if done {
1049            Ok(CacheOp::Done(id))
1050        } else {
1051            Ok(CacheOp::Cached(id))
1052        }
1053    }
1054
1055    /// Proxy for [TermCache::transform].
1056    fn transform(
1057        &mut self,
1058        pos_table: &mut PosTable,
1059        file_id: FileId,
1060    ) -> Result<CacheOp<()>, TermCacheError<UnboundTypeVariableError>> {
1061        self.terms
1062            .transform(pos_table, &self.wildcards, &self.import_data, file_id)
1063    }
1064
1065    /// Loads and parse the standard library in the AST cache.
1066    ///
1067    /// # RFC007
1068    ///
1069    /// This method doesn't populate the term cache. Use [Self::compile_stdlib] afterwards.
1070    pub fn load_stdlib(&mut self) -> Result<CacheOp<()>, Error> {
1071        let mut ret = CacheOp::Cached(());
1072
1073        for (_, file_id) in self.sources.stdlib_modules() {
1074            if let CacheOp::Done(_) = self.parse_to_ast(file_id)? {
1075                ret = CacheOp::Done(());
1076            }
1077        }
1078
1079        Ok(ret)
1080    }
1081
1082    /// Converts the parsed standard library to the runtime representation.
1083    pub fn compile_stdlib(
1084        &mut self,
1085        pos_table: &mut PosTable,
1086    ) -> Result<CacheOp<()>, AstCacheError<()>> {
1087        let mut ret = CacheOp::Cached(());
1088
1089        for (_, file_id) in self.sources.stdlib_modules() {
1090            let result = self.compile(pos_table, file_id).map_err(|cache_err| {
1091                if let CacheError::IncompatibleState { want } = cache_err {
1092                    CacheError::IncompatibleState { want }
1093                } else {
1094                    unreachable!("unexpected parse error during the compilation of stdlib")
1095                }
1096            })?;
1097
1098            if let CacheOp::Done(_) = result {
1099                ret = CacheOp::Done(());
1100            }
1101        }
1102
1103        Ok(ret)
1104    }
1105
1106    /// Typechecks the standard library. Currently only used in the test suite.
1107    pub fn typecheck_stdlib(&mut self) -> Result<CacheOp<()>, AstCacheError<TypecheckError>> {
1108        let (slice, asts) = self.split_asts();
1109        asts.typecheck_stdlib(slice)
1110    }
1111
1112    /// Loads, parses, compiles and applies program transformations to the standard library. We
1113    /// don't typecheck for performance reasons: this is done in the test suite.
1114    pub fn prepare_stdlib(&mut self, pos_table: &mut PosTable) -> Result<(), Error> {
1115        #[cfg(debug_assertions)]
1116        if self.skip_stdlib {
1117            return Ok(());
1118        }
1119
1120        self.load_stdlib()?;
1121        // unwrap(): we just loaded the stdlib, so it must be parsed in the cache.
1122        self.compile_stdlib(pos_table).unwrap();
1123
1124        self.sources
1125            .stdlib_modules()
1126            // We need to handle the internals module separately. Each field
1127            // is bound directly in the environment without evaluating it first, so we can't
1128            // tolerate top-level let bindings that would be introduced by `transform`.
1129            .try_for_each(|(_, file_id)| self.transform(pos_table, file_id).map(|_| ()))
1130            .map_err(|cache_err: TermCacheError<UnboundTypeVariableError>| {
1131                Error::ParseErrors(
1132                    cache_err
1133                        .unwrap_error(
1134                            "cache::prepare_stdlib(): unexpected unbound type variable error during stdlib loading",
1135                        )
1136                        .into(),
1137                )
1138            })?;
1139
1140        Ok(())
1141    }
1142
1143    /// Applies a custom transform to an input and its imports. [CacheError::IncompatibleState] is returned
1144    /// if the file has not yet been typechecked.
1145    ///
1146    /// If multiple invocations of `custom_transform` are needed, you must supply `transform_id` with
1147    /// with a number higher than that of all previous invocations.
1148    pub fn custom_transform<E>(
1149        &mut self,
1150        file_id: FileId,
1151        transform_id: usize,
1152        f: &mut impl FnMut(&mut CacheHub, NickelValue) -> Result<NickelValue, E>,
1153    ) -> Result<(), TermCacheError<E>> {
1154        match self.terms.entry_state(file_id) {
1155            None => Err(CacheError::IncompatibleState {
1156                want: TermEntryState::Populated,
1157            }),
1158            Some(state) => {
1159                if state.needs_custom_transform(transform_id) {
1160                    let cached_term = self.terms.terms.remove(&file_id).unwrap();
1161                    let term = f(self, cached_term.value)?;
1162                    self.terms.insert(
1163                        file_id,
1164                        TermEntry {
1165                            value: term,
1166                            state: TermEntryState::Transforming,
1167                            ..cached_term
1168                        },
1169                    );
1170
1171                    let imported: Vec<_> = self.import_data.imports(file_id).collect();
1172                    for file_id in imported {
1173                        self.custom_transform(file_id, transform_id, f)?;
1174                    }
1175
1176                    // TODO: We're setting the state back to whatever it was.
1177                    // unwrap(): we inserted the term just above
1178                    let _ = self
1179                        .terms
1180                        .update_state(file_id, TermEntryState::CustomTransformed { transform_id })
1181                        .unwrap();
1182                }
1183
1184                Ok(())
1185            }
1186        }
1187    }
1188
1189    /// Resolves every imports of a term entry of the cache, and update its state accordingly, or
1190    /// do nothing if the imports of the entry have already been resolved or if they aren't Nickel
1191    /// inputs. Require that the corresponding source has been parsed.
1192    ///
1193    /// If resolved imports contain imports themselves, resolve them recursively. Returns a tuple
1194    /// of vectors, where the first component is the imports that were transitively resolved, and
1195    /// the second component is the errors it encountered while resolving imports in `file_id`,
1196    /// respectively. Imports that were already resolved before are not included in the first
1197    /// component: this return value is currently used by the LSP to re-run code analysis on new
1198    /// files/modified files.
1199    ///
1200    /// The resolved imports are ordered by a pre-order depth-first-search. In particular, earlier
1201    /// elements in the returned list might import later elements but -- unless there are cyclic
1202    /// imports -- later elements do not import earlier elements.
1203    ///
1204    /// It only accumulates errors if the cache is in error tolerant mode, otherwise it returns an
1205    /// `Err(..)` containing  a `CacheError`.
1206    ///
1207    /// # RFC007
1208    ///
1209    /// This method is still needed only because the evaluator can't handle un-resolved import, so
1210    /// we need to replace them by resolved imports. However, actual import resolution (loading and
1211    /// parsing files for the first time) is now driven by typechecking directly.
1212    pub fn resolve_imports(
1213        &mut self,
1214        pos_table: &mut PosTable,
1215        file_id: FileId,
1216    ) -> Result<CacheOp<Vec<FileId>>, TermCacheError<ImportError>> {
1217        let entry = self.terms.terms.get(&file_id);
1218
1219        match entry {
1220            Some(TermEntry {
1221                state,
1222                value: term,
1223                format: InputFormat::Nickel,
1224            }) if *state < TermEntryState::ImportsResolving => {
1225                let term = term.clone();
1226
1227                let import_resolution::strict::ResolveResult {
1228                    transformed_term,
1229                    resolved_ids: pending,
1230                } = import_resolution::strict::resolve_imports(pos_table, term, self)?;
1231
1232                // unwrap(): we called `unwrap()` at the beginning of the enclosing if branch
1233                // on the result of `self.terms.get(&file_id)`. We only made recursive calls to
1234                // `resolve_imports` in between, which don't remove anything from `self.terms`.
1235                let cached_term = self.terms.terms.get_mut(&file_id).unwrap();
1236                cached_term.value = transformed_term;
1237                cached_term.state = TermEntryState::ImportsResolving;
1238
1239                let mut done = Vec::new();
1240
1241                // Transitively resolve the imports, and accumulate the ids of the resolved
1242                // files along the way.
1243                for id in pending {
1244                    if let CacheOp::Done(mut done_local) = self.resolve_imports(pos_table, id)? {
1245                        done.push(id);
1246                        done.append(&mut done_local)
1247                    }
1248                }
1249
1250                // unwrap(): if we are in this branch, the term is present in the cache
1251                let _ = self
1252                    .terms
1253                    .update_state(file_id, TermEntryState::ImportsResolved)
1254                    .unwrap();
1255
1256                Ok(CacheOp::Done(done))
1257            }
1258            // There's no import to resolve for non-Nickel inputs. We still update the state.
1259            Some(TermEntry { state, .. }) if *state < TermEntryState::ImportsResolving => {
1260                // unwrap(): if we are in this branch, the term is present in the cache
1261                let _ = self
1262                    .terms
1263                    .update_state(file_id, TermEntryState::ImportsResolved)
1264                    .unwrap();
1265                Ok(CacheOp::Cached(Vec::new()))
1266            }
1267            // [^transitory_entry_state]
1268            //
1269            // This case is triggered by a cyclic import. The entry is already
1270            // being treated by an ongoing call to `resolve_import` higher up in
1271            // the call chain, so we don't do anything here.
1272            //
1273            // Note that in some cases, this intermediate state can be observed by an
1274            // external caller: if a first call to `resolve_imports` fails in the middle of
1275            // resolving the transitive imports, the end state of the entry is
1276            // `ImportsResolving`. Subsequent calls to `resolve_imports` will succeed, but
1277            // won't change the state to `EntryState::ImportsResolved` (and for a good
1278            // reason: we wouldn't even know what are the pending imports to resolve). The
1279            // Nickel pipeline should however fail if `resolve_imports` failed at some
1280            // point, anyway.
1281            Some(TermEntry {
1282                state: TermEntryState::ImportsResolving,
1283                ..
1284            }) => Ok(CacheOp::Done(Vec::new())),
1285            // >= EntryState::ImportsResolved
1286            Some(_) => Ok(CacheOp::Cached(Vec::new())),
1287            None => Err(CacheError::IncompatibleState {
1288                want: TermEntryState::Populated,
1289            }),
1290        }
1291    }
1292
1293    /// Generate the initial evaluation environment from the list of `file_ids` corresponding to the
1294    /// standard library parts.
1295    pub fn mk_eval_env<EC: EvalCache>(&self, eval_cache: &mut EC) -> eval::Environment {
1296        let mut eval_env = eval::Environment::new();
1297
1298        for (module, file_id) in self.sources.stdlib_modules() {
1299            // The internals module needs special treatment: it's required to be a record
1300            // literal, and its bindings are added directly to the environment
1301            if let nickel_stdlib::StdlibModule::Internals = module {
1302                let result = eval::env_add_record(
1303                    eval_cache,
1304                    &mut eval_env,
1305                    self.terms
1306                        .get_owned(file_id)
1307                        .expect("cache::mk_eval_env(): can't build environment, stdlib not parsed")
1308                        .into(),
1309                );
1310                if let Err(eval::EnvBuildError::NotARecord(rt)) = result {
1311                    panic!(
1312                        "cache::Caches::mk_eval_env(): \
1313                            expected the stdlib module {} to be a record, got {:?}",
1314                        self.sources.name(file_id).to_string_lossy().as_ref(),
1315                        rt
1316                    )
1317                }
1318            } else {
1319                eval::env_add(
1320                    eval_cache,
1321                    &mut eval_env,
1322                    module.name().into(),
1323                    self.terms.get_owned(file_id).expect(
1324                        "cache::Caches::mk_eval_env(): can't build environment, stdlib not parsed",
1325                    ),
1326                    eval::Environment::new(),
1327                );
1328            }
1329        }
1330
1331        eval_env
1332    }
1333
1334    /// Loads a new source as a string, replacing any existing source with the same name.
1335    ///
1336    /// If there was a previous source with the same name, its `FileId` is reused and the cached
1337    /// term is deleted.
1338    ///
1339    /// Used to store intermediate short-lived generated snippets that needs to have a
1340    /// corresponding `FileId`, such as when querying or reporting errors.
1341    pub fn replace_string(&mut self, source_name: SourcePath, s: String) -> FileId {
1342        if let Some(file_id) = self.sources.id_of(&source_name) {
1343            self.sources.files.update(file_id, s);
1344            self.asts.remove(file_id);
1345            self.terms.terms.remove(&file_id);
1346            file_id
1347        } else {
1348            let file_id = self.sources.files.add(source_name.clone(), s);
1349            self.sources.file_paths.insert(file_id, source_name.clone());
1350            self.sources.file_ids.insert(
1351                source_name,
1352                NameIdEntry {
1353                    id: file_id,
1354                    source: SourceKind::Memory,
1355                },
1356            );
1357            file_id
1358        }
1359    }
1360
1361    pub fn closurize<EC: EvalCache>(
1362        &mut self,
1363        eval_cache: &mut EC,
1364        file_id: FileId,
1365    ) -> Result<CacheOp<()>, TermCacheError<()>> {
1366        self.terms.closurize(eval_cache, &self.import_data, file_id)
1367    }
1368
1369    /// Add the bindings of a record to the REPL type environment. Ignore fields whose name are
1370    /// defined through interpolation.
1371    pub fn add_repl_bindings(
1372        &mut self,
1373        pos_table: &PosTable,
1374        term: &NickelValue,
1375    ) -> Result<(), NotARecord> {
1376        let (slice, asts) = self.split_asts();
1377        asts.add_type_bindings(pos_table, slice, term)
1378    }
1379
1380    /// Converts an AST and all of its transitive dependencies to the runtime representation,
1381    /// populating the term cache. `file_id` and any of its Nickel dependencies must be present in
1382    /// the AST cache, or [CacheError::IncompatibleState] is returned. However, for non-Nickel
1383    /// dependencies, they are instead parsed directly into the term cache,
1384    ///
1385    /// For entries that have been typechecked, the wildcard cache will be populate as well
1386    /// (converting from `ast::typ::Type` to the runtime representation).
1387    ///
1388    /// "Compile" is anticipating a bit on RFC007, although it is a lowering of the AST
1389    /// representation to the runtime representation.
1390    ///
1391    /// Compilation doesn't have a proper state associated, and thus should always be coupled with
1392    /// program transformations through [Self::compile_and_transform]. It should preferably not be
1393    /// observable as an atomic transition, although as far as I can tell, this shouldn't cause
1394    /// major troubles to do so.
1395    pub fn compile(
1396        &mut self,
1397        pos_table: &mut PosTable,
1398        main_id: FileId,
1399    ) -> Result<CacheOp<()>, AstCacheError<ImportError>> {
1400        if self.terms.contains(main_id) {
1401            return Ok(CacheOp::Cached(()));
1402        }
1403
1404        // We set the format of the main `file_id` to `Nickel`, even if it is not, to require its
1405        // presence in either the term cache or the ast cache.
1406        let mut work_stack = vec![ImportTarget {
1407            file_id: main_id,
1408            format: InputFormat::default(),
1409        }];
1410
1411        while let Some(ImportTarget { file_id, format }) = work_stack.pop() {
1412            if self.terms.contains(file_id) {
1413                continue;
1414            }
1415
1416            let entry = if let InputFormat::Nickel = format {
1417                let ast_entry =
1418                    self.asts
1419                        .get_entry(file_id)
1420                        .ok_or(CacheError::IncompatibleState {
1421                            want: AstEntryState::Parsed,
1422                        })?;
1423
1424                TermEntry {
1425                    value: ast_entry.ast.to_mainline(pos_table),
1426                    format: ast_entry.format,
1427                    state: TermEntryState::default(),
1428                }
1429            } else {
1430                // We want to maintain the same error message as before the introduction of the two
1431                // distinct representations, and their processing in two stages (first Nickel files that
1432                // have an AST, and then others before evaluation).
1433                //
1434                // If we find a non-Nickel file here that needs to be parsed, it's because it's
1435                // been imported from somewhere else. The error used to be an import error, which
1436                // includes the location of the importing expression. We thus raise an import error
1437                // here, in case of failure.
1438                let term = self
1439                    .sources
1440                    .parse_other(pos_table, file_id, format)
1441                    .map_err(|parse_err| {
1442                        CacheError::Error(Box::new(ImportErrorKind::ParseErrors(
1443                            parse_err.into(),
1444                            self.import_data
1445                                .rev_imports
1446                                .get(&file_id)
1447                                .and_then(|map| map.get(&main_id))
1448                                .copied()
1449                                .unwrap_or_default(),
1450                        )))
1451                    })?;
1452
1453                TermEntry {
1454                    value: term,
1455                    format,
1456                    state: TermEntryState::default(),
1457                }
1458            };
1459
1460            self.terms.insert(file_id, entry);
1461            self.wildcards.wildcards.insert(
1462                file_id,
1463                self.asts
1464                    .get_wildcards(file_id)
1465                    .map(|ws| ws.iter())
1466                    .unwrap_or_default()
1467                    .map(|ty| ty.to_mainline(pos_table))
1468                    .collect(),
1469            );
1470
1471            work_stack.extend(
1472                self.import_data
1473                    .imports
1474                    .get(&file_id)
1475                    .into_iter()
1476                    .flat_map(|set| set.iter()),
1477            )
1478        }
1479
1480        Ok(CacheOp::Done(()))
1481    }
1482
1483    /// Converts an AST entry and all of its transitive dependencies to the runtime representation
1484    /// (compile), populating the term cache. Applies both import resolution and other program
1485    /// transformations on the resulting terms.
1486    pub fn compile_and_transform(
1487        &mut self,
1488        pos_table: &mut PosTable,
1489        file_id: FileId,
1490    ) -> Result<CacheOp<()>, AstCacheError<Error>> {
1491        let mut done = false;
1492
1493        done = matches!(
1494            self.compile(pos_table, file_id)
1495                .map_err(|cache_err| cache_err.map_err(Error::ImportError))?,
1496            CacheOp::Done(_)
1497        ) || done;
1498
1499        let imports = self
1500            .resolve_imports(pos_table, file_id)
1501            // force_cast(): since we compiled `file_id`, the term cache must be populated, and
1502            // thus `resolve_imports` should never throw `CacheError::IncompatibleState`.
1503            .map_err(|cache_err| cache_err.map_err(Error::ImportError).force_cast())?;
1504        done = matches!(imports, CacheOp::Done(_)) || done;
1505
1506        let transform = self
1507            .terms
1508            .transform(pos_table, &self.wildcards, &self.import_data, file_id)
1509            // force_cast(): since we compiled `file_id`, the term cache must be populated, and
1510            // thus `resolve_imports` should never throw `CacheError::IncompatibleState`.
1511            .map_err(|cache_err| {
1512                cache_err
1513                    .map_err(|uvar_err| Error::ParseErrors(ParseErrors::from(uvar_err)))
1514                    .force_cast()
1515            })?;
1516        done = matches!(transform, CacheOp::Done(_)) || done;
1517
1518        Ok(if done {
1519            CacheOp::Done(())
1520        } else {
1521            CacheOp::Cached(())
1522        })
1523    }
1524
1525    /// Creates a partial copy of this cache for evaluation purposes only. In particular, we don't
1526    /// copy anything related to arena-allocated ASTs. However, source files, imports data and
1527    /// terms are copied over, which is useful to make new evaluation caches cheaply, typically for
1528    /// NLS and benches.
1529    pub fn clone_for_eval(&self) -> Self {
1530        Self {
1531            terms: self.terms.clone(),
1532            sources: self.sources.clone(),
1533            asts: AstCache::empty(),
1534            wildcards: self.wildcards.clone(),
1535            import_data: self.import_data.clone(),
1536            #[cfg(debug_assertions)]
1537            skip_stdlib: self.skip_stdlib,
1538        }
1539    }
1540
1541    /// Split a mutable borrow to self into a mutable borrow of the AST cache and a mutable borrow
1542    /// of the rest.
1543    pub fn split_asts(&mut self) -> (CacheHubView<'_>, &mut AstCache) {
1544        (
1545            CacheHubView {
1546                terms: &mut self.terms,
1547                sources: &mut self.sources,
1548                wildcards: &mut self.wildcards,
1549                import_data: &mut self.import_data,
1550                #[cfg(debug_assertions)]
1551                skip_stdlib: self.skip_stdlib,
1552            },
1553            &mut self.asts,
1554        )
1555    }
1556
1557    /// See [SourceCache::input_format].
1558    pub fn input_format(&self, file_id: FileId) -> Option<InputFormat> {
1559        self.sources.input_format(file_id)
1560    }
1561}
1562
1563/// Because ASTs are arena-allocated, the self-referential [ast_cache::AstCache] which holds both
1564/// the arena and references to this arena often needs special treatment, if we want to make the
1565/// borrow checker happy. The following structure is basically a view of "everything but the ast
1566/// cache" into [CacheHub], so that we can separate and pack all the rest in a single structure,
1567/// making the signature of many [ast_cache::AstCache] methods much lighter.
1568pub struct CacheHubView<'cache> {
1569    terms: &'cache mut TermCache,
1570    sources: &'cache mut SourceCache,
1571    wildcards: &'cache mut WildcardsCache,
1572    import_data: &'cache mut ImportData,
1573    #[cfg(debug_assertions)]
1574    /// Skip loading the stdlib, used for debugging purpose
1575    skip_stdlib: bool,
1576}
1577
1578impl CacheHubView<'_> {
1579    /// Make a reborrow of this slice.
1580    pub fn reborrow(&mut self) -> CacheHubView<'_> {
1581        CacheHubView {
1582            terms: self.terms,
1583            sources: self.sources,
1584            wildcards: self.wildcards,
1585            import_data: self.import_data,
1586            #[cfg(debug_assertions)]
1587            skip_stdlib: self.skip_stdlib,
1588        }
1589    }
1590}
1591
1592/// An entry in the term cache. Stores the parsed term together with metadata and state.
1593#[derive(Debug, Clone, PartialEq)]
1594pub struct TermEntry {
1595    pub value: NickelValue,
1596    pub state: TermEntryState,
1597    pub format: InputFormat,
1598}
1599
1600/// An entry in the AST cache. Stores the parsed term together with metadata and state.
1601#[derive(Debug, Clone, PartialEq)]
1602pub struct AstEntry<'ast> {
1603    pub ast: &'ast Ast<'ast>,
1604    pub state: AstEntryState,
1605    pub format: InputFormat,
1606}
1607
1608impl<'ast> AstEntry<'ast> {
1609    /// Creates a new entry with default metadata.
1610    pub fn new(ast: &'ast Ast<'ast>) -> Self {
1611        AstEntry {
1612            ast,
1613            state: AstEntryState::default(),
1614            format: InputFormat::default(),
1615        }
1616    }
1617}
1618
1619/// Inputs can be read from the filesystem or from in-memory buffers (which come, e.g., from
1620/// the REPL, the standard library, or the language server).
1621///
1622/// Inputs read from the filesystem get auto-refreshed: if we try to access them again and
1623/// the on-disk file has changed, we read it again. Inputs read from in-memory buffers
1624/// are not auto-refreshed. If an in-memory buffer has a path that also exists in the
1625/// filesystem, we will not even check that file to see if it has changed.
1626///
1627/// An input that was open as an in-memory file may be closed, namely when the file is closed
1628/// or deleted from an editor using the LSP. In this case, the file will be read from the
1629/// filesystem again instead of using the in-memory value. Closing a file only makes sense in the
1630/// case that the [SourcePath] refers to a path on the filesystem. Other types of in-memory files,
1631/// like the standard library, cannot be closed.
1632#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Copy, Clone)]
1633enum SourceKind {
1634    Filesystem(SystemTime),
1635    Memory,
1636    MemoryClosed,
1637}
1638
1639/// The errors that can occur while closing an in memory file.
1640#[derive(Debug, Clone)]
1641pub enum FileCloseError {
1642    /// The file was not closed because no mapping of the source path to a [FileId] could be
1643    /// found.
1644    FileIdNotFound,
1645    /// A file with the given path was found, but it was not open in memory.
1646    FileNotOpen,
1647}
1648
1649impl fmt::Display for FileCloseError {
1650    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1651        match &self {
1652            FileCloseError::FileIdNotFound => {
1653                write!(
1654                    f,
1655                    "No file ID could be found for the file path to be closed."
1656                )
1657            }
1658            FileCloseError::FileNotOpen => {
1659                write!(f, "Attempted to close a file that was not open in-memory.")
1660            }
1661        }
1662    }
1663}
1664
1665impl std::error::Error for FileCloseError {}
1666
1667/// Contains information about the closed in-memory file and its replacement from the filesystem
1668/// in the case that an in-memory file was closed successfully.
1669pub struct FileCloseResult {
1670    /// The [FileId] of the in-memory file that was closed.
1671    pub closed_id: FileId,
1672    /// The [FileId] of the file loaded from the filesystem with the same path as the closed
1673    /// file, or an error indicating why the file could not be opened.
1674    /// An error would be expected here in the case that the file was deleted, which would
1675    /// also send a close file notification to the LSP.
1676    pub replacement_id: Result<FileId, io::Error>,
1677}
1678
1679/// Cache entries for sources.
1680///
1681/// A source can be either a snippet input by the user, in which case it is only identified by its
1682/// name in the name-id table, and a unique `FileId`. On the other hand, different versions of the
1683/// same file can coexist during the same session of the REPL. For this reason, an entry of the
1684/// name-id table of a file also stores the *modified at* timestamp, such that if a file is
1685/// imported or loaded again and has been modified in between, the entry is invalidated, the
1686/// content is loaded again and a new `FileId` is generated.
1687///
1688/// Note that in that case, invalidation just means that the `FileId` of a previous version is not
1689/// accessible anymore in the name-id table. However, terms that contain non evaluated imports or
1690/// source locations referring to previous version are still able access the corresponding source
1691/// or term which are kept respectively in `files` and `cache` by using the corresponding `FileId`.
1692#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Copy, Clone)]
1693pub struct NameIdEntry {
1694    id: FileId,
1695    source: SourceKind,
1696}
1697
1698/// The state of an entry of the term cache.
1699///
1700/// # Imports
1701///
1702/// Usually, when applying a procedure to a term entry (e.g. program transformations), we process
1703/// all of its transitive imports as well. We start by processing the entry, updating the state to
1704/// `XXXing` (ex: `Typechecking`) upon success. Only when all the imports have been successfully
1705/// processed, the state is updated to `XXXed` (ex: `Typechecked`).
1706#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Copy, Clone, Default)]
1707pub enum TermEntryState {
1708    /// The initial state. The term is in the cache but hasn't been processed further yet.
1709    #[default]
1710    Populated,
1711    /// A custom transformation of the entry (through `Program::custom_transform`) is underway.
1712    CustomTransforming,
1713    /// This entry has completed custom transformations of this ID and lower.
1714    CustomTransformed { transform_id: usize },
1715    /// The imports of the entry have been resolved, and the imports of its (transitive) imports are
1716    /// being resolved.
1717    ImportsResolving,
1718    /// The imports of the entry and its transitive dependencies has been resolved.
1719    ImportsResolved,
1720    /// The entry have been transformed, and its (transitive) imports are being transformed.
1721    Transforming,
1722    /// The entry and its transitive imports have been transformed.
1723    Transformed,
1724    /// The entry has been closurized.
1725    Closurized,
1726}
1727
1728impl TermEntryState {
1729    fn needs_custom_transform(&self, transform_id: usize) -> bool {
1730        if let TermEntryState::CustomTransformed {
1731            transform_id: done_transform_id,
1732        } = self
1733        {
1734            transform_id > *done_transform_id
1735        } else {
1736            *self < TermEntryState::CustomTransforming
1737        }
1738    }
1739}
1740
1741/// The state of an entry in the AST cache. Equivalent of [TermEntryState] but for ASTs.
1742#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Copy, Clone, Default)]
1743pub enum AstEntryState {
1744    /// The initial state. The AST is in the cache but hasn't been processed further yet.
1745    #[default]
1746    Parsed,
1747    /// The entry have been typechecked, and its (transitive) imports are being typechecked.
1748    Typechecking,
1749    /// The entry and its transitive imports have been typechecked.
1750    Typechecked,
1751}
1752
1753/// The result of a cache operation, such as parsing, typechecking, etc. which can either have
1754/// performed actual work, or have done nothing if the corresponding entry was already at a later
1755/// stage.
1756#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Copy, Clone)]
1757pub enum CacheOp<T> {
1758    Done(T),
1759    Cached(T),
1760}
1761
1762impl<T> CacheOp<T> {
1763    pub fn inner(self: CacheOp<T>) -> T {
1764        match self {
1765            CacheOp::Done(t) | CacheOp::Cached(t) => t,
1766        }
1767    }
1768}
1769
1770/// Wrapper around other errors to indicate that typechecking or applying program transformations
1771/// failed because the source has not been parsed yet.
1772///
1773/// # Type parameters
1774///
1775/// - `E`: the underlying, wrapped error type
1776/// - `S`: the entry state, whether [TermEntryState] or [AstEntryState] in practice.
1777#[derive(Eq, PartialEq, Debug, Clone)]
1778pub enum CacheError<E, S> {
1779    Error(E),
1780    /// The state of the entry in the cache is incompatible with the requested operation.
1781    IncompatibleState {
1782        want: S,
1783    },
1784}
1785
1786pub type AstCacheError<E> = CacheError<E, AstEntryState>;
1787pub type TermCacheError<E> = CacheError<E, TermEntryState>;
1788
1789impl<E, S> From<E> for CacheError<E, S> {
1790    fn from(e: E) -> Self {
1791        CacheError::Error(e)
1792    }
1793}
1794
1795impl<E, S> CacheError<E, S> {
1796    #[track_caller]
1797    pub fn unwrap_error(self, msg: &str) -> E {
1798        match self {
1799            CacheError::Error(err) => err,
1800            CacheError::IncompatibleState { .. } => panic!("{}", msg),
1801        }
1802    }
1803
1804    pub fn map_err<O>(self, f: impl FnOnce(E) -> O) -> CacheError<O, S> {
1805        match self {
1806            CacheError::Error(e) => CacheError::Error(f(e)),
1807            CacheError::IncompatibleState { want } => CacheError::IncompatibleState { want },
1808        }
1809    }
1810
1811    /// Assuming that `self` is of the form `CacheError::Error(e)`, cast the error type to another
1812    /// arbitrary state type `T`.
1813    ///
1814    /// # Panic
1815    ///
1816    /// This method panics if `self` is [CacheError::IncompatibleState].
1817    #[track_caller]
1818    pub fn force_cast<T>(self) -> CacheError<E, T> {
1819        match self {
1820            CacheError::Error(e) => CacheError::Error(e),
1821            CacheError::IncompatibleState { want: _ } => panic!(),
1822        }
1823    }
1824}
1825
1826/// Input data usually comes from files on the file system, but there are also lots of cases where
1827/// we want to synthesize other kinds of inputs.
1828///
1829/// Note that a [SourcePath] does not uniquely identify a cached input:
1830///
1831/// - Some functions (like [SourceCache::add_file]) add a new cached input unconditionally.
1832/// - [`SourceCache::get_or_add_file`] will add a new cached input at the same `SourcePath` if the file
1833///   on disk was updated.
1834///
1835/// The equality checking of `SourcePath` only affects [SourceCache::replace_string], which
1836/// overwrites any previous cached input with the same `SourcePath`.
1837#[derive(Debug, PartialEq, Eq, Hash, Clone)]
1838pub enum SourcePath {
1839    /// A file at the given path.
1840    ///
1841    /// Note that this does not need to be a real file on the filesystem: it could still be loaded
1842    /// from memory by, e.g, [`SourceCache::add_string`].
1843    ///
1844    /// This is the only `SourcePath` variant that can be resolved as the target of an import
1845    /// statement.
1846    Path(PathBuf, InputFormat),
1847    /// A subrange of a file at the given path.
1848    ///
1849    /// This is used by NLS to analyze small parts of files that don't fully parse. The original
1850    /// file path is preserved, because it's needed for resolving imports.
1851    Snippet(PathBuf),
1852    Std(StdlibModule),
1853    Query,
1854    ReplInput(usize),
1855    ReplTypecheck,
1856    ReplQuery,
1857    CliFieldAssignment,
1858    Override(FieldPath),
1859    Generated(String),
1860}
1861
1862impl<'a> TryFrom<&'a SourcePath> for &'a OsStr {
1863    type Error = ();
1864
1865    fn try_from(value: &'a SourcePath) -> Result<Self, Self::Error> {
1866        match value {
1867            SourcePath::Path(p, _) | SourcePath::Snippet(p) => Ok(p.as_os_str()),
1868            _ => Err(()),
1869        }
1870    }
1871}
1872
1873impl From<&SourcePath> for Option<InputFormat> {
1874    fn from(source_path: &SourcePath) -> Option<InputFormat> {
1875        if let SourcePath::Path(_p, fmt) = source_path {
1876            Some(*fmt)
1877        } else {
1878            None
1879        }
1880    }
1881}
1882
1883// [`Files`] needs to have an OsString for each file, so we synthesize names even for sources that
1884// don't have them. They don't need to be unique; they're just used for diagnostics.
1885impl From<SourcePath> for OsString {
1886    fn from(source_path: SourcePath) -> Self {
1887        match source_path {
1888            SourcePath::Path(p, _) | SourcePath::Snippet(p) => p.into(),
1889            SourcePath::Std(StdlibModule::Std) => "<stdlib/std.ncl>".into(),
1890            SourcePath::Std(StdlibModule::Internals) => "<stdlib/internals.ncl>".into(),
1891            SourcePath::Query => "<query>".into(),
1892            SourcePath::ReplInput(idx) => format!("<repl-input-{idx}>").into(),
1893            SourcePath::ReplTypecheck => "<repl-typecheck>".into(),
1894            SourcePath::ReplQuery => "<repl-query>".into(),
1895            SourcePath::CliFieldAssignment => "<cli-assignment>".into(),
1896            SourcePath::Override(path) => format!("<override {path}>",).into(),
1897            SourcePath::Generated(description) => format!("<generated {description}>").into(),
1898        }
1899    }
1900}
1901
1902/// Return status indicating if an import has been resolved from a file (first encounter), or was
1903/// retrieved from the cache.
1904///
1905/// See [ImportResolver::resolve].
1906#[derive(Debug, PartialEq, Eq)]
1907pub enum ResolvedTerm {
1908    FromFile {
1909        path: PathBuf, /* the loaded path */
1910    },
1911    FromCache,
1912}
1913
1914#[derive(Copy, Clone, Debug, PartialEq, Eq)]
1915pub enum SourceState {
1916    UpToDate(FileId),
1917    /// The source is stale because it came from a file on disk that has since been updated. The
1918    /// data is the timestamp of the new version of the file.
1919    Stale(SystemTime),
1920}
1921
1922/// Abstract the access to imported files and the import cache. Used by the evaluator and at the
1923/// [import resolution](crate::transform::import_resolution) phase.
1924///
1925/// The standard implementation uses 2 caches, the file cache for raw contents and the term cache
1926/// for parsed contents, mirroring the 2 steps when resolving an import:
1927///
1928/// 1. When an import is encountered for the first time, the content of the corresponding file is
1929///    read and stored in the file cache (consisting of the file database plus a map between paths
1930///    and ids in the database, the name-id table). The content is parsed, stored in the term
1931///    cache, and queued somewhere so that it can undergo the standard
1932///    [transformations](crate::transform) (including import resolution) later.
1933/// 2. When it is finally processed, the term cache is updated with the transformed term.
1934///
1935/// # RFC007
1936///
1937/// Import resolution on the old representation is still needed only because of the evaluator. The
1938/// typechecker now uses the new AST representation with its own import resolver.
1939pub trait ImportResolver {
1940    /// Resolves an import.
1941    ///
1942    /// Reads and stores the content of an import, puts it in the file cache (or get it from there
1943    /// if it is cached), then parses it and returns the corresponding term and file id.
1944    ///
1945    /// The term and the path are provided only if the import is processed for the first time.
1946    /// Indeed, at import resolution phase, the term of an import encountered for the first time is
1947    /// queued to be processed (e.g. having its own imports resolved). The path is needed to
1948    /// resolve nested imports relatively to this parent. Only after this processing the term is
1949    /// inserted back in the cache. On the other hand, if it has been resolved before, it is
1950    /// already transformed in the cache and do not need further processing.
1951    fn resolve(
1952        &mut self,
1953        pos_table: &mut PosTable,
1954        import: &term::Import,
1955        parent: Option<FileId>,
1956        pos_idx: PosIdx,
1957    ) -> Result<(ResolvedTerm, FileId), ImportError>;
1958
1959    /// Return a reference to the file database.
1960    fn files(&self) -> &Files;
1961
1962    /// Get a resolved import from the term cache.
1963    fn get(&self, file_id: FileId) -> Option<NickelValue>;
1964    /// Return the (potentially normalized) file path corresponding to the ID of a resolved import.
1965    fn get_path(&self, file_id: FileId) -> Option<&OsStr>;
1966
1967    /// Returns the base path for Nix evaluation, which is the parent directory of the source file
1968    /// if any, or the current working directory, or an empty path if we couldn't determine any of
1969    /// the previous two.
1970    ///
1971    /// This method need to be here because the evaluator makes use of it (when evaluating the
1972    /// `eval_nix` primop), but at this stage it only has access to the `ImportResolver` interface.
1973    /// We could give a default implementation here just using [Self::get_path], but we also need
1974    /// `get_base_dir_for_nix` in [SourceCache]. We reuse the latter `implementation instead of
1975    /// duplicating a more generic variant here.
1976    #[cfg(feature = "nix-experimental")]
1977    fn get_base_dir_for_nix(&self, file_id: FileId) -> PathBuf;
1978}
1979
1980impl ImportResolver for CacheHub {
1981    fn resolve(
1982        &mut self,
1983        pos_table: &mut PosTable,
1984        import: &term::Import,
1985        parent: Option<FileId>,
1986        pos_idx: PosIdx,
1987    ) -> Result<(ResolvedTerm, FileId), ImportError> {
1988        let pos = pos_table.get(pos_idx);
1989
1990        let (possible_parents, path, pkg_id, format) = match import {
1991            term::Import::Path { path, format } => {
1992                // `parent` is the file that did the import. We first look in its containing directory, followed by
1993                // the directories in the import path.
1994                let mut parent_path = parent
1995                    .and_then(|p| self.get_path(p))
1996                    .map(PathBuf::from)
1997                    .unwrap_or_default();
1998                parent_path.pop();
1999
2000                (
2001                    std::iter::once(parent_path)
2002                        .chain(self.sources.import_paths.iter().cloned())
2003                        .collect(),
2004                    Path::new(path),
2005                    None,
2006                    *format,
2007                )
2008            }
2009            term::Import::Package { id } => {
2010                let package_map = self
2011                    .sources
2012                    .package_map
2013                    .as_ref()
2014                    .ok_or(ImportErrorKind::NoPackageMap { pos })?;
2015                let parent_path = parent
2016                    .and_then(|p| self.sources.packages.get(&p))
2017                    .map(PathBuf::as_path);
2018                let pkg_path = package_map.get(parent_path, *id, pos)?;
2019                (
2020                    vec![pkg_path.to_owned()],
2021                    Path::new("main.ncl"),
2022                    Some(pkg_path.to_owned()),
2023                    // Packages are always in nickel format
2024                    InputFormat::Nickel,
2025                )
2026            }
2027        };
2028
2029        // Try to import from all possibilities, taking the first one that succeeds.
2030        let (id_op, path_buf) = possible_parents
2031            .iter()
2032            .find_map(|parent| {
2033                let mut path_buf = parent.clone();
2034                path_buf.push(path);
2035                self.sources
2036                    .get_or_add_file(&path_buf, format)
2037                    .ok()
2038                    .map(|x| (x, path_buf))
2039            })
2040            .ok_or_else(|| {
2041                let parents = possible_parents
2042                    .iter()
2043                    .map(|p| p.to_string_lossy())
2044                    .collect::<Vec<_>>();
2045                ImportErrorKind::IOError(
2046                    path.to_string_lossy().into_owned(),
2047                    format!("could not find import (looked in [{}])", parents.join(", ")),
2048                    pos,
2049                )
2050            })?;
2051
2052        let (result, file_id) = match id_op {
2053            CacheOp::Cached(id) => (ResolvedTerm::FromCache, id),
2054            CacheOp::Done(id) => (ResolvedTerm::FromFile { path: path_buf }, id),
2055        };
2056
2057        if let Some(parent) = parent {
2058            self.import_data
2059                .imports
2060                .entry(parent)
2061                .or_default()
2062                .insert(ImportTarget { file_id, format });
2063            self.import_data
2064                .rev_imports
2065                .entry(file_id)
2066                .or_default()
2067                .entry(parent)
2068                .or_insert(pos);
2069        }
2070
2071        self.parse_to_term(pos_table, file_id, format)
2072            .map_err(|err| ImportErrorKind::ParseErrors(err, pos))?;
2073
2074        if let Some(pkg_id) = pkg_id {
2075            self.sources.packages.insert(file_id, pkg_id);
2076        }
2077
2078        Ok((result, file_id))
2079    }
2080
2081    fn files(&self) -> &Files {
2082        &self.sources.files
2083    }
2084
2085    fn get(&self, file_id: FileId) -> Option<NickelValue> {
2086        self.terms
2087            .terms
2088            .get(&file_id)
2089            .map(|TermEntry { value, .. }| value.clone())
2090    }
2091
2092    fn get_path(&self, file_id: FileId) -> Option<&OsStr> {
2093        self.sources
2094            .file_paths
2095            .get(&file_id)
2096            .and_then(|p| p.try_into().ok())
2097    }
2098
2099    #[cfg(feature = "nix-experimental")]
2100    fn get_base_dir_for_nix(&self, file_id: FileId) -> PathBuf {
2101        self.sources.get_base_dir_for_nix(file_id)
2102    }
2103}
2104
2105/// Import resolution for new AST representation (RFC007).
2106pub trait AstImportResolver {
2107    /// Resolves an import to an AST.
2108    ///
2109    /// Reads and stores the content of an import, puts it in the file cache (or gets it from there
2110    /// if it is cached), then parses it and returns the corresponding term and file id.
2111    ///
2112    /// The term and the path are provided only if the import is processed for the first time.
2113    /// Indeed, at import resolution phase, the term of an import encountered for the first time is
2114    /// queued to be processed (e.g. having its own imports resolved). The path is needed to
2115    /// resolve nested imports relatively to this parent. Only after this processing the term is
2116    /// inserted back in the cache. On the other hand, if it has been resolved before, it is
2117    /// already transformed in the cache and do not need further processing.
2118    ///
2119    /// # Returns
2120    ///
2121    /// [Self::resolve] returns `Ok(None)` if the import is an external format, which can currently
2122    /// be serialized directly to he runtime representation ([NickelValue]) without going through
2123    /// an AST. AST import resolution is mostly used by the typechecker, and the typechecker
2124    /// currently ignores external formats anyway.
2125    ///
2126    /// # Lifetimes
2127    ///
2128    /// The signature is parametrized by two different lifetimes. This is due mostly to NLS: in the
2129    /// normal Nickel pipeline, all the ASTs are currently allocated in the same arena, and their
2130    /// lifetime is the same. However, in NLS, each files needs to be managed separately. At the
2131    /// import boundary, we're thus not guaranteed to get an AST that lives as long as the one
2132    /// being currently typechecked.
2133    fn resolve<'ast_out>(
2134        &'ast_out mut self,
2135        import: &ast::Import<'_>,
2136        pos: &TermPos,
2137    ) -> Result<Option<&'ast_out Ast<'ast_out>>, ImportErrorKind>;
2138}
2139
2140/// Normalize the path of a file for unique identification in the cache.
2141///
2142/// The returned path will be an absolute path.
2143pub fn normalize_path(path: impl Into<PathBuf>) -> std::io::Result<PathBuf> {
2144    let mut path = path.into();
2145    if path.is_relative() {
2146        path = std::env::current_dir()?.join(path);
2147    }
2148    Ok(normalize_abs_path(&path))
2149}
2150
2151/// Normalize the path (assumed to be absolute) of a file for unique identification in the cache.
2152///
2153/// This implementation (including the comment below) was taken from cargo-util.
2154///
2155/// CAUTION: This does not resolve symlinks (unlike [`std::fs::canonicalize`]). This may cause
2156/// incorrect or surprising behavior at times. This should be used carefully. Unfortunately,
2157/// [`std::fs::canonicalize`] can be hard to use correctly, since it can often fail, or on Windows
2158/// returns annoying device paths. This is a problem Cargo needs to improve on.
2159pub fn normalize_abs_path(path: &Path) -> PathBuf {
2160    use std::path::Component;
2161
2162    let mut components = path.components().peekable();
2163    let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().cloned() {
2164        components.next();
2165        PathBuf::from(c.as_os_str())
2166    } else {
2167        PathBuf::new()
2168    };
2169
2170    for component in components {
2171        match component {
2172            Component::Prefix(..) => unreachable!(),
2173            Component::RootDir => {
2174                ret.push(component.as_os_str());
2175            }
2176            Component::CurDir => {}
2177            Component::ParentDir => {
2178                ret.pop();
2179            }
2180            Component::Normal(c) => {
2181                ret.push(c);
2182            }
2183        }
2184    }
2185    ret
2186}
2187
2188/// Normalize a relative path, removing mid-path `..`s.
2189///
2190/// Like [`normalize_abs_path`], this works only on the path itself (i.e. not the filesystem) and
2191/// does not follow symlinks.
2192pub fn normalize_rel_path(path: &Path) -> PathBuf {
2193    use std::path::Component;
2194
2195    let mut components = path.components().peekable();
2196    let mut parents = PathBuf::new();
2197    let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().cloned() {
2198        components.next();
2199        PathBuf::from(c.as_os_str())
2200    } else {
2201        PathBuf::new()
2202    };
2203
2204    for component in components {
2205        match component {
2206            Component::Prefix(..) => unreachable!(),
2207            Component::RootDir => {
2208                ret.push(component.as_os_str());
2209            }
2210            Component::CurDir => {}
2211            Component::ParentDir => {
2212                if !ret.pop() {
2213                    parents.push(Component::ParentDir);
2214                }
2215            }
2216            Component::Normal(c) => {
2217                ret.push(c);
2218            }
2219        }
2220    }
2221    parents.extend(ret.components());
2222    parents
2223}
2224
2225/// Returns the timestamp of a file. Return `None` if an IO error occurred.
2226pub fn timestamp(path: impl AsRef<OsStr>) -> io::Result<SystemTime> {
2227    fs::metadata(path.as_ref())?.modified()
2228}
2229
2230/// As RFC007 is being rolled out, the typechecker now needs to operate on the new AST. We need a
2231/// structure that implements [AstImportResolver].
2232///
2233/// For borrowing reasons, this can't be all of [CacheHub] or all of [ast_cache::AstCache], as we
2234/// need to split the different things that are borrowed mutably or immutably. `AstResolver` is a
2235/// structure that borrows some parts of the cache during its lifetime and will retrieve alredy
2236/// imported ASTs, or register the newly imported ones in a separate hashmap that can be added back
2237/// to the original cache once import resolution is done.
2238pub struct AstResolver<'ast, 'cache> {
2239    /// The AST allocator used to parse new sources.
2240    alloc: &'ast AstAlloc,
2241    /// The AST cache, which is added to as import resolution progresses.
2242    asts: &'cache mut HashMap<FileId, AstEntry<'ast>>,
2243    /// The source cache where new sources will be stored.
2244    sources: &'cache mut SourceCache,
2245    /// Direct and reverse dependencies of files (with respect to imports).
2246    import_data: &'cache mut ImportData,
2247}
2248
2249impl<'ast, 'cache> AstResolver<'ast, 'cache> {
2250    /// Create a new `AstResolver` from an allocator, an ast cache and a cache hub slice.
2251    pub fn new(
2252        alloc: &'ast AstAlloc,
2253        asts: &'cache mut HashMap<FileId, AstEntry<'ast>>,
2254        slice: CacheHubView<'cache>,
2255    ) -> Self {
2256        Self {
2257            alloc,
2258            asts,
2259            sources: slice.sources,
2260            import_data: slice.import_data,
2261        }
2262    }
2263}
2264
2265impl AstImportResolver for AstResolver<'_, '_> {
2266    fn resolve(
2267        &mut self,
2268        import: &ast::Import<'_>,
2269        pos: &TermPos,
2270    ) -> Result<Option<&Ast<'_>>, ImportErrorKind> {
2271        let parent_id = pos.src_id();
2272
2273        let (possible_parents, path, pkg_id, format) = match import {
2274            ast::Import::Path { path, format } => {
2275                // `parent` is the file that did the import. We first look in its containing
2276                // directory, followed by the directories in the import path.
2277                let parent_path = parent_id
2278                    .and_then(|parent| self.sources.file_paths.get(&parent))
2279                    .and_then(|path| <&OsStr>::try_from(path).ok())
2280                    .map(PathBuf::from)
2281                    .map(|mut path| {
2282                        path.pop();
2283                        path
2284                    })
2285                    // If the parent isn't a proper file, we look in the current directory instead.
2286                    // This is useful when importing e.g. from the REPL or the CLI directly.
2287                    .unwrap_or_default();
2288
2289                (
2290                    std::iter::once(parent_path)
2291                        .chain(self.sources.import_paths.iter().cloned())
2292                        .collect(),
2293                    Path::new(path),
2294                    None,
2295                    *format,
2296                )
2297            }
2298            ast::Import::Package { id } => {
2299                let package_map = self
2300                    .sources
2301                    .package_map
2302                    .as_ref()
2303                    .ok_or(ImportErrorKind::NoPackageMap { pos: *pos })?;
2304                let parent_path = parent_id
2305                    .and_then(|p| self.sources.packages.get(&p))
2306                    .map(PathBuf::as_path);
2307                let pkg_path = package_map.get(parent_path, *id, *pos)?;
2308                (
2309                    vec![pkg_path.to_owned()],
2310                    Path::new("main.ncl"),
2311                    Some(pkg_path.to_owned()),
2312                    // Packages are always in nickel format
2313                    InputFormat::Nickel,
2314                )
2315            }
2316        };
2317
2318        // Try to import from all possibilities, taking the first one that succeeds.
2319        let id_op = possible_parents
2320            .iter()
2321            .find_map(|parent| {
2322                let mut path_buf = parent.clone();
2323                path_buf.push(path);
2324                self.sources.get_or_add_file(&path_buf, format).ok()
2325            })
2326            .ok_or_else(|| {
2327                let parents = possible_parents
2328                    .iter()
2329                    .map(|p| p.to_string_lossy())
2330                    .collect::<Vec<_>>();
2331                ImportErrorKind::IOError(
2332                    path.to_string_lossy().into_owned(),
2333                    format!("could not find import (looked in [{}])", parents.join(", ")),
2334                    *pos,
2335                )
2336            })?;
2337
2338        let file_id = id_op.inner();
2339
2340        if let Some(parent_id) = parent_id {
2341            self.import_data
2342                .imports
2343                .entry(parent_id)
2344                .or_default()
2345                .insert(ImportTarget { file_id, format });
2346            self.import_data
2347                .rev_imports
2348                .entry(file_id)
2349                .or_default()
2350                .entry(parent_id)
2351                .or_insert(*pos);
2352        }
2353
2354        if let Some(pkg_id) = pkg_id {
2355            self.sources.packages.insert(file_id, pkg_id);
2356        }
2357
2358        if let InputFormat::Nickel = format {
2359            if let Some(entry) = self.asts.get(&file_id) {
2360                Ok(Some(entry.ast))
2361            } else {
2362                let ast = parse_nickel(self.alloc, file_id, self.sources.files.source(file_id))
2363                    .map_err(|parse_err| ImportErrorKind::ParseErrors(parse_err, *pos))?;
2364                let ast = self.alloc.alloc(ast);
2365                self.asts.insert(file_id, AstEntry::new(ast));
2366
2367                Ok(Some(ast))
2368            }
2369        } else {
2370            // Currently, non-Nickel file are just ignored during the AST phase. They are parsed
2371            // later directly into the runtime
2372            Ok(None)
2373        }
2374    }
2375}
2376
2377/// Provide mockup import resolvers for testing purpose.
2378pub mod resolvers {
2379    use super::*;
2380    use crate::term::Import;
2381
2382    /// A dummy resolver that panics when asked to do something. Used to test code that contains no
2383    /// import.
2384    pub struct DummyResolver {}
2385
2386    impl ImportResolver for DummyResolver {
2387        fn resolve(
2388            &mut self,
2389            _pos_table: &mut PosTable,
2390            _import: &Import,
2391            _parent: Option<FileId>,
2392            _pos_idx: PosIdx,
2393        ) -> Result<(ResolvedTerm, FileId), ImportError> {
2394            panic!("cache::resolvers: dummy resolver should not have been invoked");
2395        }
2396
2397        fn files(&self) -> &Files {
2398            panic!("cache::resolvers: dummy resolver should not have been invoked");
2399        }
2400
2401        fn get(&self, _file_id: FileId) -> Option<NickelValue> {
2402            panic!("cache::resolvers: dummy resolver should not have been invoked");
2403        }
2404
2405        fn get_path(&self, _file_id: FileId) -> Option<&OsStr> {
2406            panic!("cache::resolvers: dummy resolver should not have been invoked");
2407        }
2408
2409        #[cfg(feature = "nix-experimental")]
2410        fn get_base_dir_for_nix(&self, _file_id: FileId) -> PathBuf {
2411            panic!("cache::resolvers: dummy resolver should not have been invoked");
2412        }
2413    }
2414
2415    /// Resolve imports from a mockup file database. Used to test imports without accessing the
2416    /// file system. File name are stored as strings, and silently converted from/to `OsString`
2417    /// when needed: don't use this resolver with source code that import non UTF-8 paths.
2418    #[derive(Clone, Default)]
2419    pub struct SimpleResolver {
2420        files: Files,
2421        file_cache: HashMap<String, FileId>,
2422        term_cache: HashMap<FileId, NickelValue>,
2423    }
2424
2425    impl SimpleResolver {
2426        pub fn new() -> SimpleResolver {
2427            SimpleResolver::default()
2428        }
2429
2430        /// Add a mockup file to available imports.
2431        pub fn add_source(&mut self, name: String, source: String) {
2432            let id = self.files.add(name.clone(), source);
2433            self.file_cache.insert(name, id);
2434        }
2435    }
2436
2437    impl ImportResolver for SimpleResolver {
2438        fn resolve(
2439            &mut self,
2440            pos_table: &mut PosTable,
2441            import: &Import,
2442            _parent: Option<FileId>,
2443            pos_idx: PosIdx,
2444        ) -> Result<(ResolvedTerm, FileId), ImportError> {
2445            let Import::Path { path, .. } = import else {
2446                panic!("simple resolver doesn't support packages");
2447            };
2448
2449            let pos = pos_table.get(pos_idx);
2450
2451            let file_id = self
2452                .file_cache
2453                .get(path.to_string_lossy().as_ref())
2454                .copied()
2455                .ok_or_else(|| {
2456                    ImportErrorKind::IOError(
2457                        path.to_string_lossy().into_owned(),
2458                        String::from("Import not found by the mockup resolver."),
2459                        pos,
2460                    )
2461                })?;
2462
2463            if let hash_map::Entry::Vacant(e) = self.term_cache.entry(file_id) {
2464                let buf = self.files.source(file_id);
2465                let alloc = AstAlloc::new();
2466
2467                let ast = parser::grammar::TermParser::new()
2468                    .parse_strict(&alloc, file_id, Lexer::new(buf))
2469                    .map_err(|e| ImportErrorKind::ParseErrors(e, pos))?;
2470                e.insert(ast.to_mainline(pos_table));
2471
2472                Ok((
2473                    ResolvedTerm::FromFile {
2474                        path: PathBuf::new(),
2475                    },
2476                    file_id,
2477                ))
2478            } else {
2479                Ok((ResolvedTerm::FromCache, file_id))
2480            }
2481        }
2482
2483        fn files(&self) -> &Files {
2484            &self.files
2485        }
2486
2487        fn get(&self, file_id: FileId) -> Option<NickelValue> {
2488            self.term_cache.get(&file_id).cloned()
2489        }
2490
2491        fn get_path(&self, file_id: FileId) -> Option<&OsStr> {
2492            Some(self.files.name(file_id))
2493        }
2494
2495        #[cfg(feature = "nix-experimental")]
2496        fn get_base_dir_for_nix(&self, file_id: FileId) -> PathBuf {
2497            self.get_path(file_id)
2498                .and_then(|path| Path::new(path).parent())
2499                .map(PathBuf::from)
2500                .unwrap_or_default()
2501        }
2502    }
2503}
2504
2505/// Parses a Nickel expression from a string.
2506fn parse_nickel<'ast>(
2507    alloc: &'ast AstAlloc,
2508    file_id: FileId,
2509    source: &str,
2510) -> Result<Ast<'ast>, ParseErrors> {
2511    let ast = measure_runtime!(
2512        "runtime:parse:nickel",
2513        parser::grammar::TermParser::new().parse_strict(alloc, file_id, Lexer::new(source))?
2514    );
2515
2516    Ok(ast)
2517}
2518
2519// Parse a Nickel REPL input. In addition to normal Nickel expressions, it can be a top-level let.
2520fn parse_nickel_repl<'ast>(
2521    alloc: &'ast AstAlloc,
2522    file_id: FileId,
2523    source: &str,
2524) -> Result<ExtendedTerm<Ast<'ast>>, ParseErrors> {
2525    let et = measure_runtime!(
2526        "runtime:parse:nickel",
2527        parser::grammar::ExtendedTermParser::new().parse_strict(
2528            alloc,
2529            file_id,
2530            Lexer::new(source)
2531        )?
2532    );
2533
2534    Ok(et)
2535}
2536
2537/// AST cache (for the new [crate::ast::Ast]) that holds the owned allocator of the AST
2538/// nodes.
2539mod ast_cache {
2540    use super::*;
2541    use crate::traverse::TraverseAlloc as _;
2542
2543    /// The AST cache packing together the AST allocator and the cached ASTs.
2544    #[self_referencing]
2545    pub struct AstCache {
2546        /// The allocator hosting AST nodes.
2547        alloc: AstAlloc,
2548        /// An AST for each file we have cached.
2549        #[borrows(alloc)]
2550        #[covariant]
2551        asts: HashMap<FileId, AstEntry<'this>>,
2552        /// The initial typing context. It's morally an option (unitialized at first), but we just
2553        /// use an empty context as a default value.
2554        ///
2555        /// This context can be augmented through [AstCache::add_repl_binding] and
2556        /// [AstCache::add_repl_bindings], which is typically used in the REPL to add top-level
2557        /// bindings.
2558        #[borrows(alloc)]
2559        #[not_covariant]
2560        type_ctxt: typecheck::Context<'this>,
2561        /// Mapping of each wildcard id to its inferred type, for each file in the cache. This is
2562        /// the same as [super::WildcardsCache], but in the new AST representation. It is later on
2563        /// transformed to the runtime representation to populate the wildcard cache.
2564        #[borrows(alloc)]
2565        #[covariant]
2566        wildcards: HashMap<FileId, typecheck::Wildcards<'this>>,
2567    }
2568
2569    impl AstCache {
2570        /// Construct a new, empty, AST cache.
2571        pub fn empty() -> Self {
2572            AstCache::new(
2573                AstAlloc::new(),
2574                |_alloc| HashMap::new(),
2575                |_alloc| typecheck::Context::new(),
2576                |_alloc| HashMap::new(),
2577            )
2578        }
2579
2580        /// Clears the allocator and the cached ASTs.
2581        pub fn clear(&mut self) {
2582            *self = Self::empty();
2583        }
2584
2585        /// Returns `true` if the AST cache contains an entry for the given file id.
2586        pub fn contains(&self, file_id: FileId) -> bool {
2587            self.borrow_asts().contains_key(&file_id)
2588        }
2589
2590        /// Returns the underlying allocator, which might be required to call various helpers.
2591        pub fn get_alloc(&self) -> &AstAlloc {
2592            self.borrow_alloc()
2593        }
2594
2595        /// Returns a reference to a cached AST.
2596        pub fn get(&self, file_id: FileId) -> Option<&Ast<'_>> {
2597            self.borrow_asts().get(&file_id).map(|entry| entry.ast)
2598        }
2599
2600        /// Returns a reference to a cached AST entry.
2601        pub fn get_entry(&self, file_id: FileId) -> Option<&AstEntry<'_>> {
2602            self.borrow_asts().get(&file_id)
2603        }
2604
2605        /// Returns the wildcards associated to an entry.
2606        pub fn get_wildcards(&self, file_id: FileId) -> Option<&typecheck::Wildcards<'_>> {
2607            self.borrow_wildcards().get(&file_id)
2608        }
2609
2610        /// Retrieves the state of an entry. Returns `None` if the entry is not in the AST cache.
2611        pub fn entry_state(&self, file_id: FileId) -> Option<AstEntryState> {
2612            self.borrow_asts()
2613                .get(&file_id)
2614                .map(|AstEntry { state, .. }| *state)
2615        }
2616
2617        /// Updates the state of an entry and returns the previous state, or an error if the entry
2618        /// isn't in the cache.
2619        pub fn update_state(
2620            &mut self,
2621            file_id: FileId,
2622            new: AstEntryState,
2623        ) -> Result<AstEntryState, TermNotFound> {
2624            self.with_asts_mut(|asts| {
2625                asts.get_mut(&file_id)
2626                    .map(|AstEntry { state, .. }| std::mem::replace(state, new))
2627            })
2628            .ok_or(TermNotFound)
2629        }
2630
2631        /// Parses a Nickel expression and stores the corresponding AST in the cache.
2632        pub fn parse_nickel<'ast>(
2633            &'ast mut self,
2634            file_id: FileId,
2635            source: &str,
2636        ) -> Result<&'ast Ast<'ast>, ParseErrors> {
2637            self.with_mut(|slf| {
2638                let ast = parse_nickel(slf.alloc, file_id, source)?;
2639                let ast = slf.alloc.alloc(ast);
2640                slf.asts.insert(file_id, AstEntry::new(ast));
2641
2642                Ok(ast)
2643            })
2644        }
2645
2646        /// Same as [Self::parse_nickel] but accepts the extended syntax allowed in the REPL.
2647        ///
2648        /// **Caution**: this method doesn't cache the potential id of a top-level let binding,
2649        /// although it does save the bound expression, which is required later for typechecking,
2650        /// program transformation, etc.
2651        pub fn parse_nickel_repl<'ast>(
2652            &'ast mut self,
2653            file_id: FileId,
2654            source: &str,
2655        ) -> Result<ExtendedTerm<Ast<'ast>>, ParseErrors> {
2656            self.with_mut(|slf| {
2657                let extd_ast = parse_nickel_repl(slf.alloc, file_id, source)?;
2658
2659                let ast = match &extd_ast {
2660                    ExtendedTerm::Term(t) | ExtendedTerm::ToplevelLet(_, t) => {
2661                        slf.alloc.alloc(t.clone())
2662                    }
2663                };
2664
2665                slf.asts.insert(file_id, AstEntry::new(ast));
2666
2667                Ok(extd_ast)
2668            })
2669        }
2670
2671        pub fn remove(&mut self, file_id: FileId) -> Option<AstEntry<'_>> {
2672            self.with_asts_mut(|asts| asts.remove(&file_id))
2673        }
2674
2675        /// Typechecks an entry of the cache and updates its state accordingly, or does nothing if
2676        /// the entry has already been typechecked. Requires that the corresponding source has been
2677        /// parsed. Note that this method currently fail on a non-Nickel file, that can't have been
2678        /// parsed to an AST.
2679        ///
2680        /// If the source contains imports, recursively typecheck on the imports too.
2681        ///
2682        /// # RFC007
2683        ///
2684        /// During the transition period between the old VM and the new bytecode VM, this method
2685        /// performs typechecking on the new representation [crate::ast::Ast].
2686        pub fn typecheck(
2687            &mut self,
2688            mut slice: CacheHubView<'_>,
2689            file_id: FileId,
2690            initial_mode: TypecheckMode,
2691        ) -> Result<CacheOp<()>, AstCacheError<TypecheckError>> {
2692            let Some(state) = self.entry_state(file_id) else {
2693                return Err(CacheError::IncompatibleState {
2694                    want: AstEntryState::Parsed,
2695                });
2696            };
2697
2698            // If we're already typechecking or we have typechecked the file, we stop right here.
2699            if state >= AstEntryState::Typechecking {
2700                return Ok(CacheOp::Cached(()));
2701            }
2702
2703            // Protect against cycles in the import graph.
2704            // unwrap(): we checked at the beginning of this function that the term is in the
2705            // cache.
2706            let _ = self
2707                .update_state(file_id, AstEntryState::Typechecking)
2708                .unwrap();
2709
2710            // Ensure the initial typing context is properly initialized.
2711            self.populate_type_ctxt(slice.sources);
2712            self.with_mut(|slf| -> Result<(), AstCacheError<TypecheckError>> {
2713                // unwrap(): we checked at the beginning of this function that the AST cache has an
2714                // entry for `file_id`.
2715                let ast = slf.asts.get(&file_id).unwrap().ast;
2716
2717                let mut resolver = AstResolver::new(slf.alloc, slf.asts, slice.reborrow());
2718                let type_ctxt = slf.type_ctxt.clone();
2719                let wildcards_map = measure_runtime!(
2720                    "runtime:type_check",
2721                    typecheck(slf.alloc, ast, type_ctxt, &mut resolver, initial_mode)?
2722                );
2723                slf.wildcards.insert(file_id, wildcards_map);
2724
2725                Ok(())
2726            })?;
2727
2728            // Typecheck dependencies (files imported by this file).
2729            if let Some(imports) = slice.import_data.imports.get(&file_id) {
2730                // Because we need to borrow `import_data` for typechecking, we need to release the
2731                // borrow by moving the content of `imports` somewhere else.
2732                //
2733                // We ignore non-Nickel imports, which aren't typechecked, and are currently not
2734                // even in the AST cache.
2735                let imports: Vec<_> = imports
2736                    .iter()
2737                    .filter_map(|tgt| {
2738                        if let InputFormat::Nickel = tgt.format {
2739                            Some(tgt.file_id)
2740                        } else {
2741                            None
2742                        }
2743                    })
2744                    .collect();
2745
2746                for file_id in imports {
2747                    self.typecheck(slice.reborrow(), file_id, initial_mode)?;
2748                }
2749            }
2750
2751            // unwrap(): we checked at the beginning of this function that the AST is in the
2752            // cache.
2753            let _ = self
2754                .update_state(file_id, AstEntryState::Typechecked)
2755                .unwrap();
2756
2757            Ok(CacheOp::Done(()))
2758        }
2759
2760        /// Typechecks the stdlib. This has to be public because it's used in benches. It probably
2761        /// does not have to be used for something else.
2762        pub fn typecheck_stdlib(
2763            &mut self,
2764            mut slice: CacheHubView<'_>,
2765        ) -> Result<CacheOp<()>, AstCacheError<TypecheckError>> {
2766            let mut ret = CacheOp::Cached(());
2767            self.populate_type_ctxt(slice.sources);
2768
2769            for (_, stdlib_module_id) in slice.sources.stdlib_modules() {
2770                let result =
2771                    self.typecheck(slice.reborrow(), stdlib_module_id, TypecheckMode::Walk)?;
2772
2773                if let CacheOp::Done(()) = result {
2774                    ret = CacheOp::Done(());
2775                }
2776            }
2777
2778            Ok(ret)
2779        }
2780
2781        /// Typechecks a file (if it wasn't already) and returns the inferred type, with type
2782        /// wildcards properly substituted.
2783        pub fn type_of(
2784            &mut self,
2785            mut slice: CacheHubView<'_>,
2786            file_id: FileId,
2787        ) -> Result<CacheOp<ast::typ::Type<'_>>, AstCacheError<TypecheckError>> {
2788            self.typecheck(slice.reborrow(), file_id, TypecheckMode::Walk)?;
2789
2790            self.with_mut(|slf| {
2791                let ast = slf
2792                    .asts
2793                    .get(&file_id)
2794                    .ok_or(CacheError::IncompatibleState {
2795                        want: AstEntryState::Parsed,
2796                    })?
2797                    .ast;
2798
2799                let mut resolver = AstResolver::new(slf.alloc, slf.asts, slice.reborrow());
2800                let type_ctxt = slf.type_ctxt.clone();
2801
2802                let typ: Result<ast::typ::Type<'_>, _> = TryConvert::try_convert(
2803                    slf.alloc,
2804                    ast.apparent_type(slf.alloc, Some(&type_ctxt.type_env), Some(&mut resolver)),
2805                );
2806
2807                let typ = typ.unwrap_or(ast::typ::TypeF::Dyn.into());
2808
2809                // unwrap(): we ensured that the file is typechecked, thus its wildcards and its AST
2810                // must be populated
2811                let wildcards = slf.wildcards.get(&file_id).unwrap();
2812
2813                Ok(CacheOp::Done(
2814                    typ.traverse(
2815                        slf.alloc,
2816                        &mut |ty: ast::typ::Type| -> Result<_, std::convert::Infallible> {
2817                            if let ast::typ::TypeF::Wildcard(id) = ty.typ {
2818                                Ok(wildcards
2819                                    .get(id)
2820                                    .cloned()
2821                                    .unwrap_or(ast::typ::Type::from(ast::typ::TypeF::Dyn)))
2822                            } else {
2823                                Ok(ty)
2824                            }
2825                        },
2826                        TraverseOrder::TopDown,
2827                    )
2828                    .unwrap(),
2829                ))
2830            })
2831        }
2832
2833        /// If the type context hasn't been created yet, generate and cache the initial typing
2834        /// context from the list of `file_ids` corresponding to the standard library parts.
2835        /// Otherwise, do nothing.
2836        fn populate_type_ctxt(&mut self, sources: &SourceCache) {
2837            self.with_mut(|slf| {
2838                if !slf.type_ctxt.is_empty() {
2839                    return;
2840                }
2841                let stdlib_terms_vec: Vec<(StdlibModule, &'_ Ast<'_>)> = sources
2842                    .stdlib_modules()
2843                    .map(|(module, file_id)| {
2844                        let ast = slf.asts.get(&file_id).map(|entry| entry.ast);
2845
2846                        (
2847                            module,
2848                            ast.expect("cache::ast_cache::AstCache::populate_type_ctxt(): can't build environment, stdlib not parsed")
2849                        )
2850                    })
2851                    .collect();
2852
2853                *slf.type_ctxt = typecheck::mk_initial_ctxt(slf.alloc, stdlib_terms_vec).unwrap();
2854            });
2855        }
2856
2857        /// Adds a binding to the type environment. The bound term is identified by its file id
2858        /// `file_id`.
2859        pub fn add_type_binding(
2860            &mut self,
2861            mut slice: CacheHubView<'_>,
2862            id: LocIdent,
2863            file_id: FileId,
2864        ) -> Result<(), AstCacheError<std::convert::Infallible>> {
2865            self.with_mut(|slf| {
2866                let Some(entry) = slf.asts.get(&file_id) else {
2867                    return Err(CacheError::IncompatibleState {
2868                        want: AstEntryState::Parsed,
2869                    });
2870                };
2871
2872                let ast = entry.ast;
2873                let mut resolver = AstResolver::new(slf.alloc, slf.asts, slice.reborrow());
2874
2875                typecheck::env_add(
2876                    slf.alloc,
2877                    &mut slf.type_ctxt.type_env,
2878                    id,
2879                    ast,
2880                    &slf.type_ctxt.term_env,
2881                    &mut resolver,
2882                );
2883                //slf.asts.extend(resolver.new_asts.into_iter());
2884
2885                slf.type_ctxt
2886                    .term_env
2887                    .0
2888                    .insert(id.ident(), (ast.clone(), slf.type_ctxt.term_env.clone()));
2889                Ok(())
2890            })?;
2891
2892            Ok(())
2893        }
2894
2895        /// Add the bindings of a record to the type environment. Ignore fields whose name are
2896        /// defined through interpolation.
2897        pub fn add_type_bindings(
2898            &mut self,
2899            pos_table: &PosTable,
2900            mut slice: CacheHubView<'_>,
2901            term: &NickelValue,
2902        ) -> Result<(), NotARecord> {
2903            self.with_mut(|slf| {
2904                // It's sad, but for now, we have to convert the term back to an AST to insert it in
2905                // the type environment.
2906                let ast = term.to_ast(slf.alloc, pos_table);
2907                let mut resolver = AstResolver::new(slf.alloc, slf.asts, slice.reborrow());
2908
2909                typecheck::env_add_term(
2910                    slf.alloc,
2911                    &mut slf.type_ctxt.type_env,
2912                    ast,
2913                    &slf.type_ctxt.term_env,
2914                    &mut resolver,
2915                )
2916                .map_err(|_| NotARecord)
2917            })
2918        }
2919    }
2920}
2921
2922#[cfg(test)]
2923mod tests {
2924    use std::path::Path;
2925
2926    use super::*;
2927
2928    #[test]
2929    fn normalize_rel() {
2930        assert_eq!(
2931            &normalize_rel_path(Path::new("../a/../b")),
2932            Path::new("../b")
2933        );
2934        assert_eq!(
2935            &normalize_rel_path(Path::new("../../a/../b")),
2936            Path::new("../../b")
2937        );
2938    }
2939
2940    #[test]
2941    fn get_cached_source_with_relative_path() {
2942        let mut sources = SourceCache::new();
2943        let root_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("nickel-test-rootdir");
2944        let path = SourcePath::Path(root_path.join("file.ncl"), super::InputFormat::Nickel);
2945        let file_id = sources.replace_string(path, "1".into());
2946
2947        // This path should not exist on the host but should
2948        // match the in memory file that was set up in the cache
2949        let file = sources
2950            .get_or_add_file(
2951                root_path.join("subdir").join("..").join("file.ncl"),
2952                InputFormat::Nickel,
2953            )
2954            .expect("Missed cached file when pulling with relative path");
2955        assert_eq!(CacheOp::Cached(file_id), file);
2956    }
2957
2958    #[test]
2959    fn close_file() {
2960        let mut sources = SourceCache::new();
2961        let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("closed.ncl");
2962        let source_path = SourcePath::Path(path.clone(), InputFormat::Nickel);
2963        sources.add_string(source_path.clone(), "1".to_string());
2964        sources
2965            .close_in_memory_file(path.clone(), InputFormat::Nickel)
2966            .unwrap();
2967        assert_eq!(
2968            sources
2969                .file_ids
2970                .get(&source_path)
2971                .map(|it| it.source)
2972                .unwrap(),
2973            SourceKind::MemoryClosed
2974        );
2975
2976        // Since the closed file should be stale, id_or_new_timestamp_of should not return the
2977        // file ID for the closed file. Since in this case the file doesn't exist on the
2978        // filesystem, it should return an error.
2979        assert!(
2980            sources
2981                .id_or_new_timestamp_of(&path, InputFormat::Nickel)
2982                .is_err()
2983        );
2984    }
2985}
nickel_lang_core/cache.rs

nickel_lang_core/
cache.rs