nickel_lang_core/cache.rs
1//! Various caches for artifacts generated across the whole pipeline: source code, parsed
2//! representations, imports data (dependencies and reverse dependencies, etc.)
3//!
4//! In order to manage the complexity of correctly borrowing such structures, where the arena
5//! allocation of ASTs requires usage of self-borrowing structures, the main cache is split in
6//! different subcaches that can be borrowed independently.
7pub use ast_cache::AstCache;
8
9pub use nickel_lang_parser::ast::InputFormat;
10
11use crate::{
12 ast::{
13 self, Ast, AstAlloc, TryConvert,
14 compat::{ToAst, ToMainline},
15 },
16 closurize::Closurize as _,
17 error::{Error, ImportError, ImportErrorKind, ParseError, ParseErrors, TypecheckError},
18 eval::{self, cache::Cache as EvalCache, value::NickelValue},
19 files::{FileId, Files},
20 identifier::LocIdent,
21 metrics::measure_runtime,
22 package::PackageMap,
23 parser::{self, ErrorTolerantParser, ExtendedTerm, lexer::Lexer},
24 position::{PosIdx, PosTable, TermPos},
25 program::FieldPath,
26 serialize::yaml::Listify,
27 stdlib::{self as nickel_stdlib, StdlibModule},
28 term::{self},
29 transform::{self, Wildcards, import_resolution},
30 traverse::TraverseOrder,
31 typ::UnboundTypeVariableError,
32 typecheck::{self, HasApparentType, TypecheckMode, typecheck},
33};
34
35#[cfg(feature = "nix-experimental")]
36use crate::nix_ffi;
37
38use std::{
39 collections::{HashMap, HashSet, hash_map},
40 ffi::{OsStr, OsString},
41 fmt, fs,
42 io::{self, Read},
43 path::{Path, PathBuf},
44 result::Result,
45 sync::Arc,
46 time::SystemTime,
47};
48
49use ouroboros::self_referencing;
50
51/// Error when trying to add bindings to the typing context where the given term isn't a record
52/// literal.
53pub struct NotARecord;
54
55/// The term cache stores the parsed values (the runtime representation) of sources.
56#[derive(Debug, Clone)]
57pub struct TermCache {
58 /// The term table stores parsed terms corresponding to the entries of the file database.
59 terms: HashMap<FileId, TermEntry>,
60}
61
62#[derive(Debug, Clone, Copy, Eq, PartialEq)]
63pub struct TermNotFound;
64
65impl TermCache {
66 pub fn new() -> Self {
67 TermCache {
68 terms: HashMap::new(),
69 }
70 }
71
72 /// Updates the state of an entry and returns the previous state, or an error if the entry
73 /// isn't in the cache.
74 pub fn update_state(
75 &mut self,
76 file_id: FileId,
77 new: TermEntryState,
78 ) -> Result<TermEntryState, TermNotFound> {
79 self.terms
80 .get_mut(&file_id)
81 .map(|TermEntry { state, .. }| std::mem::replace(state, new))
82 .ok_or(TermNotFound)
83 }
84
85 /// Applies program transformations excepted import resolution, implemented in a separate phase.
86 fn transform(
87 &mut self,
88 pos_table: &mut PosTable,
89 wildcards: &WildcardsCache,
90 import_data: &ImportData,
91 file_id: FileId,
92 ) -> Result<CacheOp<()>, TermCacheError<UnboundTypeVariableError>> {
93 match self.terms.get(&file_id).map(|entry| entry.state) {
94 Some(state) if state >= TermEntryState::Transformed => Ok(CacheOp::Cached(())),
95 Some(state) => {
96 if state < TermEntryState::Transforming {
97 let cached_term = self.terms.remove(&file_id).unwrap();
98 let term = transform::transform(
99 pos_table,
100 cached_term.value,
101 wildcards.wildcards.get(&file_id),
102 )?;
103 self.insert(
104 file_id,
105 TermEntry {
106 value: term,
107 state: TermEntryState::Transforming,
108 ..cached_term
109 },
110 );
111
112 let imported: Vec<_> = import_data.imports(file_id).collect();
113 for file_id in imported {
114 self.transform(pos_table, wildcards, import_data, file_id)?;
115 }
116
117 // unwrap(): we re-inserted the entry after removal and transformation, so it
118 // should be in the cache.
119 let _ = self
120 .update_state(file_id, TermEntryState::Transformed)
121 .unwrap();
122 }
123
124 Ok(CacheOp::Done(()))
125 }
126 None => Err(CacheError::IncompatibleState {
127 want: TermEntryState::Populated,
128 }),
129 }
130 }
131
132 /// Retrieves the state of an entry. Returns `None` if the entry is not in the term cache. This
133 /// might happen if the file hasn't been parsed, or if the term cache hasn't be filled from the
134 /// AST cache yet. The latter is supposed to happen right before program transformations.
135 pub fn entry_state(&self, file_id: FileId) -> Option<TermEntryState> {
136 self.terms
137 .get(&file_id)
138 .map(|TermEntry { state, .. }| *state)
139 }
140
141 /// Replaces a cache entry by a closurized version of itself. If it contains imports,
142 /// closurize them recursively.
143 ///
144 /// Closurization is not required before evaluation, but it has two benefits:
145 ///
146 /// - the closurized term uses the evaluation cache, so if it is imported in multiple
147 /// places then they will share a cache
148 /// - the eval cache's built-in mechanism for preventing infinite recursion will also
149 /// apply to recursive imports.
150 ///
151 /// The main disadvantage of closurization is that it makes the resulting runtime
152 /// representation less useful. You wouldn't want to closurize before pretty-printing, for
153 /// example. This isn't as important these days, since we also have the AST representation at
154 /// hand.
155 pub fn closurize<C: EvalCache>(
156 &mut self,
157 cache: &mut C,
158 import_data: &ImportData,
159 file_id: FileId,
160 ) -> Result<CacheOp<()>, TermCacheError<()>> {
161 match self.entry_state(file_id) {
162 Some(state) if state >= TermEntryState::Closurized => Ok(CacheOp::Cached(())),
163 Some(_) => {
164 let cached_term = self.terms.remove(&file_id).unwrap();
165 let term = cached_term.value.closurize(cache, eval::Environment::new());
166 self.insert(
167 file_id,
168 TermEntry {
169 value: term,
170 state: TermEntryState::Closurized,
171 ..cached_term
172 },
173 );
174
175 let imported: Vec<_> = import_data.imports(file_id).collect();
176 for file_id in imported {
177 self.closurize(cache, import_data, file_id)?;
178 }
179
180 Ok(CacheOp::Done(()))
181 }
182 None => Err(CacheError::IncompatibleState {
183 want: TermEntryState::Populated,
184 }),
185 }
186 }
187
188 /// Returns an immutable reference to the whole term cache.
189 pub fn terms(&self) -> &HashMap<FileId, TermEntry> {
190 &self.terms
191 }
192
193 /// Retrieves a fresh clone of a cached term.
194 pub fn get_owned(&self, file_id: FileId) -> Option<NickelValue> {
195 self.terms
196 .get(&file_id)
197 .map(|TermEntry { value: term, .. }| term.clone())
198 }
199
200 /// Retrieves a reference to a cached term.
201 pub fn get(&self, file_id: FileId) -> Option<&NickelValue> {
202 self.terms
203 .get(&file_id)
204 .map(|TermEntry { value: term, .. }| term)
205 }
206
207 /// Retrieves the whole entry for a given file id.
208 pub fn get_entry(&self, file_id: FileId) -> Option<&TermEntry> {
209 self.terms.get(&file_id)
210 }
211
212 /// Returns `true` if the term cache contains a term for the given file id.
213 pub fn contains(&self, file_id: FileId) -> bool {
214 self.terms.contains_key(&file_id)
215 }
216
217 /// Inserts a new entry in the cache. Usually, this should be handled by [CacheHub] directly,
218 /// but there are some use-cases where it is useful to pre-fill the term cache (typically in
219 /// NLS).
220 pub fn insert(&mut self, file_id: FileId, entry: TermEntry) {
221 self.terms.insert(file_id, entry);
222 }
223}
224
225/// This is a temporary fix for [#2362](https://github.com/tweag/nickel/issues/2362). File paths
226/// prefixed with this are treated specially: they can refer to in-memory source. To build an
227/// import expression that refers to an in-memory source, append the source name to this prefix and
228/// use it as the path: `format!({IN_MEMORY_SOURCE_PATH_PREFIX}{src_name})`.
229pub const IN_MEMORY_SOURCE_PATH_PREFIX: &str = "%inmem_src%:";
230
231/// The source cache handles reading textual data from the file system or other souces and storing
232/// it in a [Files] instance.
233///
234/// While not ideal, we have to make most of the fields public to allow the LSP to perform its own
235/// import resolution.
236#[derive(Clone)]
237pub struct SourceCache {
238 /// The content of the program sources plus imports.
239 pub files: Files,
240 /// Reverse map from file ids to source paths.
241 pub file_paths: HashMap<FileId, SourcePath>,
242 /// The name-id table, holding file ids stored in the database indexed by source names.
243 pub file_ids: HashMap<SourcePath, NameIdEntry>,
244 /// Paths where to look for imports, as included by the user through either the CLI argument
245 /// `--import-path` or the environment variable `$NICKEL_IMPORT_PATH`.
246 pub import_paths: Vec<PathBuf>,
247 /// A table mapping FileIds to the package that they belong to.
248 ///
249 /// Path dependencies have already been canonicalized to absolute paths.
250 pub packages: HashMap<FileId, PathBuf>,
251 /// The map used to resolve package imports.
252 pub package_map: Option<PackageMap>,
253}
254
255impl SourceCache {
256 pub fn new() -> Self {
257 let files =
258 Files::new(crate::stdlib::modules().map(|m| (m.file_name().to_owned(), m.content())));
259 SourceCache {
260 files,
261 file_paths: HashMap::new(),
262 file_ids: HashMap::new(),
263 import_paths: Vec::new(),
264 packages: HashMap::new(),
265 package_map: None,
266 }
267 }
268
269 /// Retrieves the name of a source given an id.
270 pub fn name(&self, file_id: FileId) -> &OsStr {
271 self.files.name(file_id)
272 }
273
274 /// Add paths to the import path list, where the resolver is looking for imported files.
275 pub fn add_import_paths<P>(&mut self, paths: impl Iterator<Item = P>)
276 where
277 PathBuf: From<P>,
278 {
279 self.import_paths.extend(paths.map(PathBuf::from));
280 }
281
282 /// Sets the package map to use for package import resolution.
283 pub fn set_package_map(&mut self, map: PackageMap) {
284 self.package_map = Some(map);
285 }
286
287 /// Same as [Self::add_file], but assumes that the path is already normalized and takes the
288 /// timestamp as a parameter.
289 fn add_normalized_file(
290 &mut self,
291 path: PathBuf,
292 format: InputFormat,
293 timestamp: SystemTime,
294 ) -> io::Result<FileId> {
295 let contents = std::fs::read_to_string(&path)?;
296 let file_id = self.files.add(&path, contents);
297
298 self.file_paths
299 .insert(file_id, SourcePath::Path(path.clone(), format));
300 self.file_ids.insert(
301 SourcePath::Path(path, format),
302 NameIdEntry {
303 id: file_id,
304 source: SourceKind::Filesystem(timestamp),
305 },
306 );
307 Ok(file_id)
308 }
309
310 /// Loads a file and adds it to the name-id table.
311 ///
312 /// Uses the normalized path and the *modified at* timestamp as the name-id table entry.
313 /// Overrides any existing entry with the same name.
314 pub fn add_file(
315 &mut self,
316 path: impl Into<OsString>,
317 format: InputFormat,
318 ) -> io::Result<FileId> {
319 let path = path.into();
320 let timestamp = timestamp(&path)?;
321 let normalized = normalize_path(&path)?;
322 self.add_normalized_file(normalized, format, timestamp)
323 }
324
325 /// Try to retrieve the id of a file from the cache.
326 ///
327 /// If it was not in cache, try to read it and add it as a new entry.
328 ///
329 /// # In memory sources
330 ///
331 /// As a temporary fix for [#2362](https://github.com/tweag/nickel/issues/2362), if a file path
332 /// starts with [IN_MEMORY_SOURCE_PATH_PREFIX], the suffix is looked up un-normalized value
333 /// first, which makes it possible to hit in-memory only sources by importing a path
334 /// `"{SOURCE_PATH_PREFX}{src_name}"`. If it can't be found, it is looked up normally, so that
335 /// it doesn't break strange file names that happen to contain the source path prefix.
336 ///
337 /// It is theoretically possible that if both the source "abc" and the file
338 /// "{IN_MEMORY_SOURCE_PATH_PREFIX}abc" exist, the source is imported instead of the intended
339 /// file. However, given the prefix, it just can't be accidental. As we want to give access to
340 /// in-memory sources in any case, although this can be surprising, I don't see any obvious
341 /// attack scenario here. This fix is also intended to be temporary. If you still need to make
342 /// sure this doesn't happen, one way would be to add some randomness to the name of the
343 /// sources, so that they can't be predicted beforehand.
344 pub fn get_or_add_file(
345 &mut self,
346 path: impl Into<OsString>,
347 format: InputFormat,
348 ) -> io::Result<CacheOp<FileId>> {
349 let path = path.into();
350 let normalized = normalize_path(&path)?;
351
352 // Try to fetch a generated source if the path starts with a hardcoded prefix
353 let generated_entry = path
354 .to_str()
355 .and_then(|p| p.strip_prefix(IN_MEMORY_SOURCE_PATH_PREFIX))
356 .and_then(|src_name| {
357 self.file_ids
358 .get(&SourcePath::Path(src_name.into(), format))
359 });
360
361 if let Some(entry) = generated_entry {
362 return Ok(CacheOp::Cached(entry.id));
363 }
364
365 match self.id_or_new_timestamp_of(normalized.as_ref(), format)? {
366 SourceState::UpToDate(id) => Ok(CacheOp::Cached(id)),
367 SourceState::Stale(timestamp) => self
368 .add_normalized_file(normalized, format, timestamp)
369 .map(CacheOp::Done),
370 }
371 }
372
373 /// Load a source and add it to the name-id table.
374 ///
375 /// Do not check if a source with the same name already exists: if it is the case,
376 /// [Self::add_source] will happily will override the old entry in the name-id table.
377 pub fn add_source<T>(&mut self, source_name: SourcePath, mut source: T) -> io::Result<FileId>
378 where
379 T: Read,
380 {
381 let mut buffer = String::new();
382 source.read_to_string(&mut buffer)?;
383 Ok(self.add_string(source_name, buffer))
384 }
385
386 /// Returns the content of a file.
387 ///
388 /// Panics if the file id is invalid.
389 pub fn source(&self, id: FileId) -> &str {
390 self.files.source(id)
391 }
392
393 /// Returns a cloned `Arc` to the content of the file.
394 ///
395 /// The `Arc` is here for the LSP, where the background evaluation is handled by background
396 /// threads and processes.
397 ///
398 /// Panics if the file id is invalid.
399 pub fn clone_source(&self, id: FileId) -> Arc<str> {
400 self.files.clone_source(id)
401 }
402
403 /// Loads a new source as a string and add it to the name-id table.
404 ///
405 /// Do not check if a source with the same name already exists: if it is the case, this one
406 /// will override the old entry in the name-id table but the old `FileId` will remain valid.
407 pub fn add_string(&mut self, source_name: SourcePath, s: String) -> FileId {
408 let id = self.files.add(source_name.clone(), s);
409
410 self.file_paths.insert(id, source_name.clone());
411 self.file_ids.insert(
412 source_name,
413 NameIdEntry {
414 id,
415 source: SourceKind::Memory,
416 },
417 );
418 id
419 }
420
421 /// Loads a new source as a string, replacing any existing source with the same name.
422 ///
423 /// As opposed to [CacheHub::replace_string], this method doesn't update the other caches. It
424 /// just affects the source cache.
425 pub fn replace_string(&mut self, source_name: SourcePath, s: String) -> FileId {
426 if let Some(file_id) = self.id_of(&source_name) {
427 // The file may have been originally loaded from the filesystem and then
428 // updated by the LSP, so the SourceKind needs to be updated to Memory.
429 self.file_ids.insert(
430 source_name,
431 NameIdEntry {
432 id: file_id,
433 source: SourceKind::Memory,
434 },
435 );
436 self.files.update(file_id, s);
437 file_id
438 } else {
439 // We re-use [Self::add_string] here to properly fill the file_paths and file_ids
440 // tables.
441 self.add_string(source_name, s)
442 }
443 }
444
445 /// Closes a file that has been opened in memory and reloads it from the filesystem.
446 /// Returns the file ID of the replacement file loaded from the filesystem.
447 pub fn close_in_memory_file(
448 &mut self,
449 path: PathBuf,
450 format: InputFormat,
451 ) -> Result<FileCloseResult, FileCloseError> {
452 let entry = self
453 .file_ids
454 .get_mut(&SourcePath::Path(path.clone(), format))
455 .ok_or(FileCloseError::FileIdNotFound)?;
456 match &entry.source {
457 SourceKind::Memory => {
458 let closed_id = entry.id;
459 entry.source = SourceKind::MemoryClosed;
460 let replacement_id = self.get_or_add_file(path, format).map(|op| op.inner());
461 Ok(FileCloseResult {
462 closed_id,
463 replacement_id,
464 })
465 }
466 _ => Err(FileCloseError::FileNotOpen),
467 }
468 }
469
470 /// Retrieves the id of a source given a name.
471 ///
472 /// Note that files added via [Self::add_file] are indexed by their full normalized path (cf
473 /// [normalize_path]).
474 pub fn id_of(&self, name: &SourcePath) -> Option<FileId> {
475 match name {
476 SourcePath::Path(p, fmt) => match self.id_or_new_timestamp_of(p, *fmt).ok()? {
477 SourceState::UpToDate(id) => Some(id),
478 SourceState::Stale(_) => None,
479 },
480 name => Some(self.file_ids.get(name)?.id),
481 }
482 }
483
484 /// Tries to retrieve the id of a cached source.
485 ///
486 /// Only returns `Ok` if the source is up-to-date; if the source is stale, returns
487 /// either the new timestamp of the up-to-date file or the error we encountered when
488 /// trying to read it (which most likely means there was no such file).
489 ///
490 /// The main point of this awkward signature is to minimize I/O operations: if we accessed
491 /// the timestamp, keep it around.
492 fn id_or_new_timestamp_of(&self, name: &Path, format: InputFormat) -> io::Result<SourceState> {
493 match self
494 .file_ids
495 .get(&SourcePath::Path(name.to_owned(), format))
496 {
497 None
498 | Some(NameIdEntry {
499 source: SourceKind::MemoryClosed,
500 ..
501 }) => Ok(SourceState::Stale(timestamp(name)?)),
502 Some(NameIdEntry {
503 id,
504 source: SourceKind::Filesystem(ts),
505 }) => {
506 let new_timestamp = timestamp(name)?;
507 if ts == &new_timestamp {
508 Ok(SourceState::UpToDate(*id))
509 } else {
510 Ok(SourceState::Stale(new_timestamp))
511 }
512 }
513 Some(NameIdEntry {
514 id,
515 source: SourceKind::Memory,
516 }) => Ok(SourceState::UpToDate(*id)),
517 }
518 }
519
520 /// Gets a reference to the underlying files. Required by the WASM REPL error reporting code
521 /// and LSP functions.
522 pub fn files(&self) -> &Files {
523 &self.files
524 }
525
526 /// Parses a Nickel source without querying nor populating other caches.
527 pub fn parse_nickel<'ast>(
528 &self,
529 // We take the allocator explicitly, to make sure `self.asts` is properly initialized
530 // before calling this function, and won't be dropped.
531 alloc: &'ast AstAlloc,
532 file_id: FileId,
533 ) -> Result<Ast<'ast>, ParseErrors> {
534 parse_nickel(alloc, file_id, self.files.source(file_id))
535 }
536
537 /// Parses a source that isn't Nickel code without querying nor populating the other caches. Support
538 /// multiple formats.
539 ///
540 /// The Nickel/non Nickel distinction is a bit artificial at the moment, due to the fact that
541 /// parsing Nickel returns the new [crate::ast::Ast], while parsing other formats
542 /// don't go through the new AST first but directly deserialize to the legacy
543 /// [crate::term::Term] for simplicity and performance reasons.
544 ///
545 /// Once RFC007 is fully implemented, we might clean it up.
546 ///
547 /// # Panic
548 ///
549 /// This function panics if `format` is [InputFormat::Nickel].
550 pub fn parse_other(
551 &self,
552 pos_table: &mut PosTable,
553 file_id: FileId,
554 format: InputFormat,
555 ) -> Result<NickelValue, ParseError> {
556 let whole_span: TermPos = self.files.source_span(file_id).into();
557 let pos_idx = pos_table.push(whole_span);
558
559 let source = self.files.source(file_id);
560
561 match format {
562 InputFormat::Nickel => {
563 // Panicking isn't great, but we expect this to be temporary, until RFC007 is fully
564 // implemented. And this case is an internal bug.
565 panic!("error: trying to parse a Nickel source with parse_other_nocache")
566 }
567 InputFormat::Json => {
568 crate::serialize::yaml::load_json_value(pos_table, source, Some(file_id))
569 }
570 InputFormat::Yaml => crate::serialize::yaml::load_yaml_value(
571 pos_table,
572 source,
573 Some(file_id),
574 Listify::Auto,
575 ),
576 InputFormat::Toml => crate::serialize::toml_deser::from_str(pos_table, source, file_id)
577 .map(|v: NickelValue| v.with_pos_idx(pos_idx))
578 .map_err(|err| ParseError::from_toml(err, file_id)),
579 #[cfg(feature = "nix-experimental")]
580 InputFormat::Nix => {
581 let json = nix_ffi::eval_to_json(source, &self.get_base_dir_for_nix(file_id))
582 .map_err(|e| ParseError::from_nix(e.what(), file_id))?;
583 serde_json::from_str(&json)
584 .map(|v: NickelValue| v.with_pos_idx(pos_idx))
585 .map_err(|err| ParseError::from_serde_json(err, Some((file_id, &self.files))))
586 }
587 InputFormat::Text => Ok(NickelValue::string(source, pos_idx)),
588 }
589 }
590
591 /// Returns true if a particular file id represents a Nickel standard library file, false
592 /// otherwise.
593 pub fn is_stdlib_module(&self, file: FileId) -> bool {
594 self.files.is_stdlib(file)
595 }
596
597 /// Retrieves the file id for a given standard libray module.
598 pub fn get_submodule_file_id(&self, module: StdlibModule) -> Option<FileId> {
599 self.stdlib_modules()
600 .find(|(m, _id)| m == &module)
601 .map(|(_, id)| id)
602 }
603
604 /// Returns the list of file ids corresponding to the standard library modules.
605 pub fn stdlib_modules(&self) -> impl Iterator<Item = (StdlibModule, FileId)> + use<> {
606 let ids = self.files.stdlib_modules();
607 crate::stdlib::modules().into_iter().zip(ids)
608 }
609
610 /// Return the format of a given source. Returns `None` if there is no entry in the source
611 /// cache for `file_id`, or if there is no well-defined input format (e.g. for REPL inputs,
612 /// field assignments, etc.).
613 pub fn input_format(&self, file_id: FileId) -> Option<InputFormat> {
614 self.file_paths
615 .get(&file_id)
616 .and_then(|source| match source {
617 SourcePath::Path(_, input_format) => Some(*input_format),
618 SourcePath::Std(_) => Some(InputFormat::Nickel),
619 SourcePath::Snippet(_)
620 | SourcePath::Query
621 | SourcePath::ReplInput(_)
622 | SourcePath::ReplTypecheck
623 | SourcePath::ReplQuery
624 | SourcePath::CliFieldAssignment
625 | SourcePath::Override(_)
626 | SourcePath::Generated(_) => None,
627 })
628 }
629
630 /// Returns the base path for Nix evaluation, which is the parent directory of the source file
631 /// if any, or the current working directory, or an empty path if we couldn't find any better.
632 #[cfg(feature = "nix-experimental")]
633 fn get_base_dir_for_nix(&self, file_id: FileId) -> PathBuf {
634 let parent_dir = self
635 .file_paths
636 .get(&file_id)
637 .and_then(|source_path| Path::new(<&OsStr>::try_from(source_path).ok()?).parent());
638
639 parent_dir
640 .map(PathBuf::from)
641 .or_else(|| std::env::current_dir().ok())
642 .unwrap_or_default()
643 }
644}
645
646/// Stores the mapping of each wildcard id to its inferred type, for each file in the cache.
647#[derive(Default, Clone, Debug)]
648pub struct WildcardsCache {
649 wildcards: HashMap<FileId, Wildcards>,
650}
651
652impl WildcardsCache {
653 pub fn new() -> Self {
654 Self::default()
655 }
656
657 pub fn get(&self, file_id: FileId) -> Option<&Wildcards> {
658 self.wildcards.get(&file_id)
659 }
660}
661
662/// Metadata about an imported file.
663#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
664pub struct ImportTarget {
665 pub file_id: FileId,
666 pub format: InputFormat,
667}
668
669/// Stores dependencies and reverse dependencies data between sources.
670#[derive(Default, Clone)]
671pub struct ImportData {
672 /// A map containing for each FileId a list of files they import (directly).
673 pub imports: HashMap<FileId, HashSet<ImportTarget>>,
674 /// A map containing for each FileId a list of files importing them (directly). Note that we
675 /// don't need to store the format here, as only Nickel files can import other files. We do
676 /// however store the position of the first import expression (the same file can be imported
677 /// many times from a given file), for error reporting purpose.
678 pub rev_imports: HashMap<FileId, HashMap<FileId, TermPos>>,
679}
680
681impl ImportData {
682 pub fn new() -> Self {
683 Self::default()
684 }
685
686 /// Returns the set of files that this file imports.
687 pub fn imports(&self, file: FileId) -> impl Iterator<Item = FileId> + '_ {
688 self.imports
689 .get(&file)
690 .into_iter()
691 .flat_map(|s| s.iter())
692 .map(|tgt| tgt.file_id)
693 }
694
695 /// Returns the set of files that import this file.
696 pub fn rev_imports(&self, file: FileId) -> impl Iterator<Item = FileId> + '_ {
697 self.rev_imports
698 .get(&file)
699 .into_iter()
700 .flat_map(|h| h.keys())
701 .copied()
702 }
703
704 /// Returns the set of files that transitively depend on this file.
705 pub fn transitive_rev_imports(&self, file: FileId) -> HashSet<FileId> {
706 let mut ret = HashSet::new();
707 let mut stack = vec![file];
708
709 while let Some(file) = stack.pop() {
710 for f in self.rev_imports(file) {
711 if ret.insert(f) {
712 stack.push(f);
713 }
714 }
715 }
716
717 ret
718 }
719
720 /// Returns the set of files that this file transitively depends on.
721 pub fn transitive_imports(&self, file: FileId) -> HashSet<FileId> {
722 let mut ret = HashSet::new();
723 let mut stack = vec![file];
724
725 while let Some(file) = stack.pop() {
726 for f in self.imports(file) {
727 if ret.insert(f) {
728 stack.push(f);
729 }
730 }
731 }
732
733 ret
734 }
735
736 /// Returns `true` if those import data are empty.
737 pub fn is_empty(&self) -> bool {
738 self.imports.is_empty() && self.rev_imports.is_empty()
739 }
740}
741
742/// The cache hub aggregates the various kind of source-related caches used by Nickel.
743///
744/// [CacheHub] handles parsing, typechecking and program transformation of sources, as well as
745/// caching the corresponding artifacts (text, ASTs, state). This is the central entry point for
746/// other modules.
747///
748/// # RFC007
749///
750/// As part of the migration to a new AST required by RFC007, as long as we don't have a fully
751/// working bytecode virtual machine, the cache needs to keep parsed expressions both as the old
752/// representation (dubbed "mainline" or the runtime representation in many places) and as the new
753/// AST representation.
754pub struct CacheHub {
755 pub terms: TermCache,
756 pub sources: SourceCache,
757 pub asts: AstCache,
758 pub wildcards: WildcardsCache,
759 pub import_data: ImportData,
760 #[cfg(debug_assertions)]
761 /// Skip loading the stdlib, used for debugging purpose
762 pub skip_stdlib: bool,
763}
764
765impl CacheHub {
766 pub fn new() -> Self {
767 CacheHub {
768 terms: TermCache::new(),
769 sources: SourceCache::new(),
770 asts: AstCache::empty(),
771 wildcards: WildcardsCache::new(),
772 import_data: ImportData::new(),
773 #[cfg(debug_assertions)]
774 skip_stdlib: false,
775 }
776 }
777
778 /// Actual implementation of [Self::parse_ast] which doesn't take `self` as a parameter, so that it
779 /// can be reused from other places when we don't have a full [CacheHub] instance at hand.
780 fn parse_ast_impl(
781 asts: &mut AstCache,
782 sources: &mut SourceCache,
783 file_id: FileId,
784 ) -> Result<CacheOp<()>, ParseErrors> {
785 if asts.contains(file_id) {
786 Ok(CacheOp::Cached(()))
787 } else {
788 let _ = asts.parse_nickel(file_id, sources.files.source(file_id))?;
789 Ok(CacheOp::Done(()))
790 }
791 }
792
793 /// Parse a REPL input and populate the corresponding entry in the cache.
794 ///
795 /// The first component of the tuple in the `Ok` case is the identifier of the toplevel let, if
796 /// the input is a toplevel let, or `None` if the input is a standard Nickel expression.
797 ///
798 /// # RFC007
799 ///
800 /// This method populates both the ast cache and the term cache at once.
801 pub fn parse_repl(
802 &mut self,
803 pos_table: &mut PosTable,
804 file_id: FileId,
805 ) -> Result<CacheOp<Option<LocIdent>>, ParseErrors> {
806 // Since we need the identifier, we always reparse the input. In any case, it doesn't
807 // happen that we the same REPL input twice right now, so caching it is in fact useless.
808 // It's just must simpler to reuse the cache infrastructure than to reimplement the whole
809 // transformations and import dependencies tracking elsewhere.
810 let extd_ast = self
811 .asts
812 .parse_nickel_repl(file_id, self.sources.files.source(file_id))?;
813
814 let (id, ast) = match extd_ast {
815 ExtendedTerm::Term(t) => (None, t),
816 ExtendedTerm::ToplevelLet(id, t) => (Some(id), t),
817 };
818
819 let term = measure_runtime!("runtime:ast_conversion", ast.to_mainline(pos_table));
820
821 self.terms.insert(
822 file_id,
823 TermEntry {
824 value: term,
825 state: TermEntryState::default(),
826 format: InputFormat::Nickel,
827 },
828 );
829
830 Ok(CacheOp::Done(id))
831 }
832
833 /// Parses a source and populate the corresponding entry in the AST cache, or do nothing if the
834 /// entry has already been parsed. External input formats are currently directly parsed to the
835 /// runtime representation, without going through an AST: currently, the format is assumed to
836 /// be [InputFormat::Nickel] in this method. See [Self::parse_to_term] for other formats.
837 ///
838 /// # RFC007
839 ///
840 /// This method only populates the AST cache. The term cache must be filled separately.
841 pub fn parse_to_ast(&mut self, file_id: FileId) -> Result<CacheOp<()>, ParseErrors> {
842 Self::parse_ast_impl(&mut self.asts, &mut self.sources, file_id)
843 }
844
845 /// Parses a source or compiles an AST into the term cache:
846 ///
847 /// - if the entry is already in the term cache, do nothing.
848 /// - if the format is Nickel and there is a corresponding entry in the AST cache, converts the
849 /// parsed AST to a [NickelValue] and put it in the term cache.
850 /// - if the format is Nickel but there is no cached AST, or if the format is not Nickel, parse
851 /// the input directly into the term cache.
852 ///
853 /// Mostly used during ([NickelValue]-based) import resolution.
854 pub fn parse_to_term(
855 &mut self,
856 pos_table: &mut PosTable,
857 file_id: FileId,
858 format: InputFormat,
859 ) -> Result<CacheOp<()>, ParseErrors> {
860 if self.terms.contains(file_id) {
861 return Ok(CacheOp::Cached(()));
862 }
863
864 let term = if let InputFormat::Nickel = format {
865 match self.compile(pos_table, file_id) {
866 Ok(cache_op) => return Ok(cache_op),
867 Err(_) => {
868 let alloc = AstAlloc::new();
869 self.sources
870 .parse_nickel(&alloc, file_id)?
871 .to_mainline(pos_table)
872 }
873 }
874 } else {
875 self.sources.parse_other(pos_table, file_id, format)?
876 };
877
878 self.terms.insert(
879 file_id,
880 TermEntry {
881 value: term,
882 state: TermEntryState::default(),
883 format,
884 },
885 );
886
887 Ok(CacheOp::Done(()))
888 }
889
890 /// Typecheck an entry of the cache and update its state accordingly, or do nothing if the
891 /// entry has already been typechecked. Require that the corresponding source has been parsed.
892 /// If the source contains imports, [Self::typecheck] recursively typechecks the imports as
893 /// well.
894 ///
895 /// # RFC007
896 ///
897 /// During the transition period between the old VM and the new bytecode VM, this method
898 /// performs typechecking on the new representation [crate::ast::Ast].
899 pub fn typecheck(
900 &mut self,
901 file_id: FileId,
902 initial_mode: TypecheckMode,
903 ) -> Result<CacheOp<()>, AstCacheError<TypecheckError>> {
904 let (slice, asts) = self.split_asts();
905 asts.typecheck(slice, file_id, initial_mode)
906 }
907
908 /// Returns the apparent type of an entry that has been typechecked with wildcards substituted.
909 pub fn type_of(
910 &mut self,
911 file_id: FileId,
912 ) -> Result<CacheOp<ast::typ::Type<'_>>, AstCacheError<TypecheckError>> {
913 let (slice, asts) = self.split_asts();
914 asts.type_of(slice, file_id)
915 }
916
917 /// Prepares a source for evaluation: parse, typecheck and apply program transformations, if it
918 /// was not already done.
919 pub fn prepare(
920 &mut self,
921 pos_table: &mut PosTable,
922 file_id: FileId,
923 ) -> Result<CacheOp<()>, Error> {
924 self.prepare_impl(pos_table, file_id, true)
925 }
926
927 /// Prepare a file for evaluation only. Same as [Self::prepare], but doesn't typecheck the
928 /// source.
929 pub fn prepare_eval_only(
930 &mut self,
931 pos_table: &mut PosTable,
932 file_id: FileId,
933 ) -> Result<CacheOp<()>, Error> {
934 self.prepare_impl(pos_table, file_id, false)
935 }
936
937 /// Common implementation for [Self::prepare] and [Self::prepare_eval_only], which optionally
938 /// skips typechecking. Note that this method doesn't load and prepare the stdlib.
939 fn prepare_impl(
940 &mut self,
941 pos_table: &mut PosTable,
942 file_id: FileId,
943 typecheck: bool,
944 ) -> Result<CacheOp<()>, Error> {
945 let mut result = CacheOp::Cached(());
946
947 let format = self
948 .sources
949 .file_paths
950 .get(&file_id)
951 .and_then(Option::<InputFormat>::from)
952 .unwrap_or_default();
953
954 if let InputFormat::Nickel = format {
955 if let CacheOp::Done(_) = self.parse_to_ast(file_id)? {
956 result = CacheOp::Done(());
957 }
958
959 if typecheck {
960 let (slice, asts) = self.split_asts();
961
962 let typecheck_res = asts
963 .typecheck(slice, file_id, TypecheckMode::Walk)
964 .map_err(|cache_err| {
965 cache_err.unwrap_error(
966 "cache::prepare(): expected source to be parsed before typechecking",
967 )
968 })?;
969
970 if typecheck_res == CacheOp::Done(()) {
971 result = CacheOp::Done(());
972 };
973 }
974 }
975 // Non-Nickel terms are currently not parsed as ASTs, but directly as the runtime
976 // representation. While the imports of the main file will be parsed to terms by the
977 // `compile_and_transform` automatically, we do need to ensure that the main file is in the
978 // term cache if it's an external format, or `compile_and_transform` will complain.
979 else if let CacheOp::Done(_) = self.parse_to_term(pos_table, file_id, format)? {
980 result = CacheOp::Done(());
981 }
982
983 let transform_res =
984 self.compile_and_transform(pos_table, file_id)
985 .map_err(|cache_err| {
986 cache_err.unwrap_error(
987 "cache::prepare(): expected source to be parsed before transformations",
988 )
989 })?;
990
991 if transform_res == CacheOp::Done(()) {
992 result = CacheOp::Done(());
993 };
994
995 Ok(result)
996 }
997
998 /// Prepare an REPL snippet for evaluation: parse, typecheck and apply program transformations,
999 /// if it was not already done. The difference with [Self::prepare] is that this method also
1000 /// accept toplevel binding `let <id> = <value>`.
1001 ///
1002 /// Returns the identifier of the toplevel let, if the input is a toplevel let, or `None` if
1003 /// the input is a standard Nickel expression.
1004 pub fn prepare_repl(
1005 &mut self,
1006 pos_table: &mut PosTable,
1007 file_id: FileId,
1008 ) -> Result<CacheOp<Option<LocIdent>>, Error> {
1009 let mut done = false;
1010
1011 let parsed = self.parse_repl(pos_table, file_id)?;
1012
1013 done = done || matches!(parsed, CacheOp::Done(_));
1014
1015 let id = parsed.inner();
1016
1017 let (slice, asts) = self.split_asts();
1018 let typecheck_res = asts
1019 .typecheck(slice, file_id, TypecheckMode::Walk)
1020 .map_err(|cache_err| {
1021 cache_err.unwrap_error(
1022 "cache::prepare_repl(): expected source to be parsed before typechecking",
1023 )
1024 })?;
1025
1026 if let Some(id) = id {
1027 let (slice, asts) = self.split_asts();
1028 asts
1029 .add_type_binding(
1030 slice,
1031 id,
1032 file_id,
1033 ).expect("cache::prepare_repl(): expected source to be parsed before augmenting the type environment");
1034 }
1035
1036 done = done || matches!(typecheck_res, CacheOp::Done(_));
1037
1038 let transform_res =
1039 self.compile_and_transform(pos_table, file_id)
1040 .map_err(|cache_err| {
1041 cache_err.unwrap_error(
1042 "cache::prepare(): expected source to be parsed before transformations",
1043 )
1044 })?;
1045
1046 done = done || matches!(transform_res, CacheOp::Done(_));
1047
1048 if done {
1049 Ok(CacheOp::Done(id))
1050 } else {
1051 Ok(CacheOp::Cached(id))
1052 }
1053 }
1054
1055 /// Proxy for [TermCache::transform].
1056 fn transform(
1057 &mut self,
1058 pos_table: &mut PosTable,
1059 file_id: FileId,
1060 ) -> Result<CacheOp<()>, TermCacheError<UnboundTypeVariableError>> {
1061 self.terms
1062 .transform(pos_table, &self.wildcards, &self.import_data, file_id)
1063 }
1064
1065 /// Loads and parse the standard library in the AST cache.
1066 ///
1067 /// # RFC007
1068 ///
1069 /// This method doesn't populate the term cache. Use [Self::compile_stdlib] afterwards.
1070 pub fn load_stdlib(&mut self) -> Result<CacheOp<()>, Error> {
1071 let mut ret = CacheOp::Cached(());
1072
1073 for (_, file_id) in self.sources.stdlib_modules() {
1074 if let CacheOp::Done(_) = self.parse_to_ast(file_id)? {
1075 ret = CacheOp::Done(());
1076 }
1077 }
1078
1079 Ok(ret)
1080 }
1081
1082 /// Converts the parsed standard library to the runtime representation.
1083 pub fn compile_stdlib(
1084 &mut self,
1085 pos_table: &mut PosTable,
1086 ) -> Result<CacheOp<()>, AstCacheError<()>> {
1087 let mut ret = CacheOp::Cached(());
1088
1089 for (_, file_id) in self.sources.stdlib_modules() {
1090 let result = self.compile(pos_table, file_id).map_err(|cache_err| {
1091 if let CacheError::IncompatibleState { want } = cache_err {
1092 CacheError::IncompatibleState { want }
1093 } else {
1094 unreachable!("unexpected parse error during the compilation of stdlib")
1095 }
1096 })?;
1097
1098 if let CacheOp::Done(_) = result {
1099 ret = CacheOp::Done(());
1100 }
1101 }
1102
1103 Ok(ret)
1104 }
1105
1106 /// Typechecks the standard library. Currently only used in the test suite.
1107 pub fn typecheck_stdlib(&mut self) -> Result<CacheOp<()>, AstCacheError<TypecheckError>> {
1108 let (slice, asts) = self.split_asts();
1109 asts.typecheck_stdlib(slice)
1110 }
1111
1112 /// Loads, parses, compiles and applies program transformations to the standard library. We
1113 /// don't typecheck for performance reasons: this is done in the test suite.
1114 pub fn prepare_stdlib(&mut self, pos_table: &mut PosTable) -> Result<(), Error> {
1115 #[cfg(debug_assertions)]
1116 if self.skip_stdlib {
1117 return Ok(());
1118 }
1119
1120 self.load_stdlib()?;
1121 // unwrap(): we just loaded the stdlib, so it must be parsed in the cache.
1122 self.compile_stdlib(pos_table).unwrap();
1123
1124 self.sources
1125 .stdlib_modules()
1126 // We need to handle the internals module separately. Each field
1127 // is bound directly in the environment without evaluating it first, so we can't
1128 // tolerate top-level let bindings that would be introduced by `transform`.
1129 .try_for_each(|(_, file_id)| self.transform(pos_table, file_id).map(|_| ()))
1130 .map_err(|cache_err: TermCacheError<UnboundTypeVariableError>| {
1131 Error::ParseErrors(
1132 cache_err
1133 .unwrap_error(
1134 "cache::prepare_stdlib(): unexpected unbound type variable error during stdlib loading",
1135 )
1136 .into(),
1137 )
1138 })?;
1139
1140 Ok(())
1141 }
1142
1143 /// Applies a custom transform to an input and its imports. [CacheError::IncompatibleState] is returned
1144 /// if the file has not yet been typechecked.
1145 ///
1146 /// If multiple invocations of `custom_transform` are needed, you must supply `transform_id` with
1147 /// with a number higher than that of all previous invocations.
1148 pub fn custom_transform<E>(
1149 &mut self,
1150 file_id: FileId,
1151 transform_id: usize,
1152 f: &mut impl FnMut(&mut CacheHub, NickelValue) -> Result<NickelValue, E>,
1153 ) -> Result<(), TermCacheError<E>> {
1154 match self.terms.entry_state(file_id) {
1155 None => Err(CacheError::IncompatibleState {
1156 want: TermEntryState::Populated,
1157 }),
1158 Some(state) => {
1159 if state.needs_custom_transform(transform_id) {
1160 let cached_term = self.terms.terms.remove(&file_id).unwrap();
1161 let term = f(self, cached_term.value)?;
1162 self.terms.insert(
1163 file_id,
1164 TermEntry {
1165 value: term,
1166 state: TermEntryState::Transforming,
1167 ..cached_term
1168 },
1169 );
1170
1171 let imported: Vec<_> = self.import_data.imports(file_id).collect();
1172 for file_id in imported {
1173 self.custom_transform(file_id, transform_id, f)?;
1174 }
1175
1176 // TODO: We're setting the state back to whatever it was.
1177 // unwrap(): we inserted the term just above
1178 let _ = self
1179 .terms
1180 .update_state(file_id, TermEntryState::CustomTransformed { transform_id })
1181 .unwrap();
1182 }
1183
1184 Ok(())
1185 }
1186 }
1187 }
1188
1189 /// Resolves every imports of a term entry of the cache, and update its state accordingly, or
1190 /// do nothing if the imports of the entry have already been resolved or if they aren't Nickel
1191 /// inputs. Require that the corresponding source has been parsed.
1192 ///
1193 /// If resolved imports contain imports themselves, resolve them recursively. Returns a tuple
1194 /// of vectors, where the first component is the imports that were transitively resolved, and
1195 /// the second component is the errors it encountered while resolving imports in `file_id`,
1196 /// respectively. Imports that were already resolved before are not included in the first
1197 /// component: this return value is currently used by the LSP to re-run code analysis on new
1198 /// files/modified files.
1199 ///
1200 /// The resolved imports are ordered by a pre-order depth-first-search. In particular, earlier
1201 /// elements in the returned list might import later elements but -- unless there are cyclic
1202 /// imports -- later elements do not import earlier elements.
1203 ///
1204 /// It only accumulates errors if the cache is in error tolerant mode, otherwise it returns an
1205 /// `Err(..)` containing a `CacheError`.
1206 ///
1207 /// # RFC007
1208 ///
1209 /// This method is still needed only because the evaluator can't handle un-resolved import, so
1210 /// we need to replace them by resolved imports. However, actual import resolution (loading and
1211 /// parsing files for the first time) is now driven by typechecking directly.
1212 pub fn resolve_imports(
1213 &mut self,
1214 pos_table: &mut PosTable,
1215 file_id: FileId,
1216 ) -> Result<CacheOp<Vec<FileId>>, TermCacheError<ImportError>> {
1217 let entry = self.terms.terms.get(&file_id);
1218
1219 match entry {
1220 Some(TermEntry {
1221 state,
1222 value: term,
1223 format: InputFormat::Nickel,
1224 }) if *state < TermEntryState::ImportsResolving => {
1225 let term = term.clone();
1226
1227 let import_resolution::strict::ResolveResult {
1228 transformed_term,
1229 resolved_ids: pending,
1230 } = import_resolution::strict::resolve_imports(pos_table, term, self)?;
1231
1232 // unwrap(): we called `unwrap()` at the beginning of the enclosing if branch
1233 // on the result of `self.terms.get(&file_id)`. We only made recursive calls to
1234 // `resolve_imports` in between, which don't remove anything from `self.terms`.
1235 let cached_term = self.terms.terms.get_mut(&file_id).unwrap();
1236 cached_term.value = transformed_term;
1237 cached_term.state = TermEntryState::ImportsResolving;
1238
1239 let mut done = Vec::new();
1240
1241 // Transitively resolve the imports, and accumulate the ids of the resolved
1242 // files along the way.
1243 for id in pending {
1244 if let CacheOp::Done(mut done_local) = self.resolve_imports(pos_table, id)? {
1245 done.push(id);
1246 done.append(&mut done_local)
1247 }
1248 }
1249
1250 // unwrap(): if we are in this branch, the term is present in the cache
1251 let _ = self
1252 .terms
1253 .update_state(file_id, TermEntryState::ImportsResolved)
1254 .unwrap();
1255
1256 Ok(CacheOp::Done(done))
1257 }
1258 // There's no import to resolve for non-Nickel inputs. We still update the state.
1259 Some(TermEntry { state, .. }) if *state < TermEntryState::ImportsResolving => {
1260 // unwrap(): if we are in this branch, the term is present in the cache
1261 let _ = self
1262 .terms
1263 .update_state(file_id, TermEntryState::ImportsResolved)
1264 .unwrap();
1265 Ok(CacheOp::Cached(Vec::new()))
1266 }
1267 // [^transitory_entry_state]
1268 //
1269 // This case is triggered by a cyclic import. The entry is already
1270 // being treated by an ongoing call to `resolve_import` higher up in
1271 // the call chain, so we don't do anything here.
1272 //
1273 // Note that in some cases, this intermediate state can be observed by an
1274 // external caller: if a first call to `resolve_imports` fails in the middle of
1275 // resolving the transitive imports, the end state of the entry is
1276 // `ImportsResolving`. Subsequent calls to `resolve_imports` will succeed, but
1277 // won't change the state to `EntryState::ImportsResolved` (and for a good
1278 // reason: we wouldn't even know what are the pending imports to resolve). The
1279 // Nickel pipeline should however fail if `resolve_imports` failed at some
1280 // point, anyway.
1281 Some(TermEntry {
1282 state: TermEntryState::ImportsResolving,
1283 ..
1284 }) => Ok(CacheOp::Done(Vec::new())),
1285 // >= EntryState::ImportsResolved
1286 Some(_) => Ok(CacheOp::Cached(Vec::new())),
1287 None => Err(CacheError::IncompatibleState {
1288 want: TermEntryState::Populated,
1289 }),
1290 }
1291 }
1292
1293 /// Generate the initial evaluation environment from the list of `file_ids` corresponding to the
1294 /// standard library parts.
1295 pub fn mk_eval_env<EC: EvalCache>(&self, eval_cache: &mut EC) -> eval::Environment {
1296 let mut eval_env = eval::Environment::new();
1297
1298 for (module, file_id) in self.sources.stdlib_modules() {
1299 // The internals module needs special treatment: it's required to be a record
1300 // literal, and its bindings are added directly to the environment
1301 if let nickel_stdlib::StdlibModule::Internals = module {
1302 let result = eval::env_add_record(
1303 eval_cache,
1304 &mut eval_env,
1305 self.terms
1306 .get_owned(file_id)
1307 .expect("cache::mk_eval_env(): can't build environment, stdlib not parsed")
1308 .into(),
1309 );
1310 if let Err(eval::EnvBuildError::NotARecord(rt)) = result {
1311 panic!(
1312 "cache::Caches::mk_eval_env(): \
1313 expected the stdlib module {} to be a record, got {:?}",
1314 self.sources.name(file_id).to_string_lossy().as_ref(),
1315 rt
1316 )
1317 }
1318 } else {
1319 eval::env_add(
1320 eval_cache,
1321 &mut eval_env,
1322 module.name().into(),
1323 self.terms.get_owned(file_id).expect(
1324 "cache::Caches::mk_eval_env(): can't build environment, stdlib not parsed",
1325 ),
1326 eval::Environment::new(),
1327 );
1328 }
1329 }
1330
1331 eval_env
1332 }
1333
1334 /// Loads a new source as a string, replacing any existing source with the same name.
1335 ///
1336 /// If there was a previous source with the same name, its `FileId` is reused and the cached
1337 /// term is deleted.
1338 ///
1339 /// Used to store intermediate short-lived generated snippets that needs to have a
1340 /// corresponding `FileId`, such as when querying or reporting errors.
1341 pub fn replace_string(&mut self, source_name: SourcePath, s: String) -> FileId {
1342 if let Some(file_id) = self.sources.id_of(&source_name) {
1343 self.sources.files.update(file_id, s);
1344 self.asts.remove(file_id);
1345 self.terms.terms.remove(&file_id);
1346 file_id
1347 } else {
1348 let file_id = self.sources.files.add(source_name.clone(), s);
1349 self.sources.file_paths.insert(file_id, source_name.clone());
1350 self.sources.file_ids.insert(
1351 source_name,
1352 NameIdEntry {
1353 id: file_id,
1354 source: SourceKind::Memory,
1355 },
1356 );
1357 file_id
1358 }
1359 }
1360
1361 pub fn closurize<EC: EvalCache>(
1362 &mut self,
1363 eval_cache: &mut EC,
1364 file_id: FileId,
1365 ) -> Result<CacheOp<()>, TermCacheError<()>> {
1366 self.terms.closurize(eval_cache, &self.import_data, file_id)
1367 }
1368
1369 /// Add the bindings of a record to the REPL type environment. Ignore fields whose name are
1370 /// defined through interpolation.
1371 pub fn add_repl_bindings(
1372 &mut self,
1373 pos_table: &PosTable,
1374 term: &NickelValue,
1375 ) -> Result<(), NotARecord> {
1376 let (slice, asts) = self.split_asts();
1377 asts.add_type_bindings(pos_table, slice, term)
1378 }
1379
1380 /// Converts an AST and all of its transitive dependencies to the runtime representation,
1381 /// populating the term cache. `file_id` and any of its Nickel dependencies must be present in
1382 /// the AST cache, or [CacheError::IncompatibleState] is returned. However, for non-Nickel
1383 /// dependencies, they are instead parsed directly into the term cache,
1384 ///
1385 /// For entries that have been typechecked, the wildcard cache will be populate as well
1386 /// (converting from `ast::typ::Type` to the runtime representation).
1387 ///
1388 /// "Compile" is anticipating a bit on RFC007, although it is a lowering of the AST
1389 /// representation to the runtime representation.
1390 ///
1391 /// Compilation doesn't have a proper state associated, and thus should always be coupled with
1392 /// program transformations through [Self::compile_and_transform]. It should preferably not be
1393 /// observable as an atomic transition, although as far as I can tell, this shouldn't cause
1394 /// major troubles to do so.
1395 pub fn compile(
1396 &mut self,
1397 pos_table: &mut PosTable,
1398 main_id: FileId,
1399 ) -> Result<CacheOp<()>, AstCacheError<ImportError>> {
1400 if self.terms.contains(main_id) {
1401 return Ok(CacheOp::Cached(()));
1402 }
1403
1404 // We set the format of the main `file_id` to `Nickel`, even if it is not, to require its
1405 // presence in either the term cache or the ast cache.
1406 let mut work_stack = vec![ImportTarget {
1407 file_id: main_id,
1408 format: InputFormat::default(),
1409 }];
1410
1411 while let Some(ImportTarget { file_id, format }) = work_stack.pop() {
1412 if self.terms.contains(file_id) {
1413 continue;
1414 }
1415
1416 let entry = if let InputFormat::Nickel = format {
1417 let ast_entry =
1418 self.asts
1419 .get_entry(file_id)
1420 .ok_or(CacheError::IncompatibleState {
1421 want: AstEntryState::Parsed,
1422 })?;
1423
1424 TermEntry {
1425 value: ast_entry.ast.to_mainline(pos_table),
1426 format: ast_entry.format,
1427 state: TermEntryState::default(),
1428 }
1429 } else {
1430 // We want to maintain the same error message as before the introduction of the two
1431 // distinct representations, and their processing in two stages (first Nickel files that
1432 // have an AST, and then others before evaluation).
1433 //
1434 // If we find a non-Nickel file here that needs to be parsed, it's because it's
1435 // been imported from somewhere else. The error used to be an import error, which
1436 // includes the location of the importing expression. We thus raise an import error
1437 // here, in case of failure.
1438 let term = self
1439 .sources
1440 .parse_other(pos_table, file_id, format)
1441 .map_err(|parse_err| {
1442 CacheError::Error(Box::new(ImportErrorKind::ParseErrors(
1443 parse_err.into(),
1444 self.import_data
1445 .rev_imports
1446 .get(&file_id)
1447 .and_then(|map| map.get(&main_id))
1448 .copied()
1449 .unwrap_or_default(),
1450 )))
1451 })?;
1452
1453 TermEntry {
1454 value: term,
1455 format,
1456 state: TermEntryState::default(),
1457 }
1458 };
1459
1460 self.terms.insert(file_id, entry);
1461 self.wildcards.wildcards.insert(
1462 file_id,
1463 self.asts
1464 .get_wildcards(file_id)
1465 .map(|ws| ws.iter())
1466 .unwrap_or_default()
1467 .map(|ty| ty.to_mainline(pos_table))
1468 .collect(),
1469 );
1470
1471 work_stack.extend(
1472 self.import_data
1473 .imports
1474 .get(&file_id)
1475 .into_iter()
1476 .flat_map(|set| set.iter()),
1477 )
1478 }
1479
1480 Ok(CacheOp::Done(()))
1481 }
1482
1483 /// Converts an AST entry and all of its transitive dependencies to the runtime representation
1484 /// (compile), populating the term cache. Applies both import resolution and other program
1485 /// transformations on the resulting terms.
1486 pub fn compile_and_transform(
1487 &mut self,
1488 pos_table: &mut PosTable,
1489 file_id: FileId,
1490 ) -> Result<CacheOp<()>, AstCacheError<Error>> {
1491 let mut done = false;
1492
1493 done = matches!(
1494 self.compile(pos_table, file_id)
1495 .map_err(|cache_err| cache_err.map_err(Error::ImportError))?,
1496 CacheOp::Done(_)
1497 ) || done;
1498
1499 let imports = self
1500 .resolve_imports(pos_table, file_id)
1501 // force_cast(): since we compiled `file_id`, the term cache must be populated, and
1502 // thus `resolve_imports` should never throw `CacheError::IncompatibleState`.
1503 .map_err(|cache_err| cache_err.map_err(Error::ImportError).force_cast())?;
1504 done = matches!(imports, CacheOp::Done(_)) || done;
1505
1506 let transform = self
1507 .terms
1508 .transform(pos_table, &self.wildcards, &self.import_data, file_id)
1509 // force_cast(): since we compiled `file_id`, the term cache must be populated, and
1510 // thus `resolve_imports` should never throw `CacheError::IncompatibleState`.
1511 .map_err(|cache_err| {
1512 cache_err
1513 .map_err(|uvar_err| Error::ParseErrors(ParseErrors::from(uvar_err)))
1514 .force_cast()
1515 })?;
1516 done = matches!(transform, CacheOp::Done(_)) || done;
1517
1518 Ok(if done {
1519 CacheOp::Done(())
1520 } else {
1521 CacheOp::Cached(())
1522 })
1523 }
1524
1525 /// Creates a partial copy of this cache for evaluation purposes only. In particular, we don't
1526 /// copy anything related to arena-allocated ASTs. However, source files, imports data and
1527 /// terms are copied over, which is useful to make new evaluation caches cheaply, typically for
1528 /// NLS and benches.
1529 pub fn clone_for_eval(&self) -> Self {
1530 Self {
1531 terms: self.terms.clone(),
1532 sources: self.sources.clone(),
1533 asts: AstCache::empty(),
1534 wildcards: self.wildcards.clone(),
1535 import_data: self.import_data.clone(),
1536 #[cfg(debug_assertions)]
1537 skip_stdlib: self.skip_stdlib,
1538 }
1539 }
1540
1541 /// Split a mutable borrow to self into a mutable borrow of the AST cache and a mutable borrow
1542 /// of the rest.
1543 pub fn split_asts(&mut self) -> (CacheHubView<'_>, &mut AstCache) {
1544 (
1545 CacheHubView {
1546 terms: &mut self.terms,
1547 sources: &mut self.sources,
1548 wildcards: &mut self.wildcards,
1549 import_data: &mut self.import_data,
1550 #[cfg(debug_assertions)]
1551 skip_stdlib: self.skip_stdlib,
1552 },
1553 &mut self.asts,
1554 )
1555 }
1556
1557 /// See [SourceCache::input_format].
1558 pub fn input_format(&self, file_id: FileId) -> Option<InputFormat> {
1559 self.sources.input_format(file_id)
1560 }
1561}
1562
1563/// Because ASTs are arena-allocated, the self-referential [ast_cache::AstCache] which holds both
1564/// the arena and references to this arena often needs special treatment, if we want to make the
1565/// borrow checker happy. The following structure is basically a view of "everything but the ast
1566/// cache" into [CacheHub], so that we can separate and pack all the rest in a single structure,
1567/// making the signature of many [ast_cache::AstCache] methods much lighter.
1568pub struct CacheHubView<'cache> {
1569 terms: &'cache mut TermCache,
1570 sources: &'cache mut SourceCache,
1571 wildcards: &'cache mut WildcardsCache,
1572 import_data: &'cache mut ImportData,
1573 #[cfg(debug_assertions)]
1574 /// Skip loading the stdlib, used for debugging purpose
1575 skip_stdlib: bool,
1576}
1577
1578impl CacheHubView<'_> {
1579 /// Make a reborrow of this slice.
1580 pub fn reborrow(&mut self) -> CacheHubView<'_> {
1581 CacheHubView {
1582 terms: self.terms,
1583 sources: self.sources,
1584 wildcards: self.wildcards,
1585 import_data: self.import_data,
1586 #[cfg(debug_assertions)]
1587 skip_stdlib: self.skip_stdlib,
1588 }
1589 }
1590}
1591
1592/// An entry in the term cache. Stores the parsed term together with metadata and state.
1593#[derive(Debug, Clone, PartialEq)]
1594pub struct TermEntry {
1595 pub value: NickelValue,
1596 pub state: TermEntryState,
1597 pub format: InputFormat,
1598}
1599
1600/// An entry in the AST cache. Stores the parsed term together with metadata and state.
1601#[derive(Debug, Clone, PartialEq)]
1602pub struct AstEntry<'ast> {
1603 pub ast: &'ast Ast<'ast>,
1604 pub state: AstEntryState,
1605 pub format: InputFormat,
1606}
1607
1608impl<'ast> AstEntry<'ast> {
1609 /// Creates a new entry with default metadata.
1610 pub fn new(ast: &'ast Ast<'ast>) -> Self {
1611 AstEntry {
1612 ast,
1613 state: AstEntryState::default(),
1614 format: InputFormat::default(),
1615 }
1616 }
1617}
1618
1619/// Inputs can be read from the filesystem or from in-memory buffers (which come, e.g., from
1620/// the REPL, the standard library, or the language server).
1621///
1622/// Inputs read from the filesystem get auto-refreshed: if we try to access them again and
1623/// the on-disk file has changed, we read it again. Inputs read from in-memory buffers
1624/// are not auto-refreshed. If an in-memory buffer has a path that also exists in the
1625/// filesystem, we will not even check that file to see if it has changed.
1626///
1627/// An input that was open as an in-memory file may be closed, namely when the file is closed
1628/// or deleted from an editor using the LSP. In this case, the file will be read from the
1629/// filesystem again instead of using the in-memory value. Closing a file only makes sense in the
1630/// case that the [SourcePath] refers to a path on the filesystem. Other types of in-memory files,
1631/// like the standard library, cannot be closed.
1632#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Copy, Clone)]
1633enum SourceKind {
1634 Filesystem(SystemTime),
1635 Memory,
1636 MemoryClosed,
1637}
1638
1639/// The errors that can occur while closing an in memory file.
1640#[derive(Debug, Clone)]
1641pub enum FileCloseError {
1642 /// The file was not closed because no mapping of the source path to a [FileId] could be
1643 /// found.
1644 FileIdNotFound,
1645 /// A file with the given path was found, but it was not open in memory.
1646 FileNotOpen,
1647}
1648
1649impl fmt::Display for FileCloseError {
1650 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1651 match &self {
1652 FileCloseError::FileIdNotFound => {
1653 write!(
1654 f,
1655 "No file ID could be found for the file path to be closed."
1656 )
1657 }
1658 FileCloseError::FileNotOpen => {
1659 write!(f, "Attempted to close a file that was not open in-memory.")
1660 }
1661 }
1662 }
1663}
1664
1665impl std::error::Error for FileCloseError {}
1666
1667/// Contains information about the closed in-memory file and its replacement from the filesystem
1668/// in the case that an in-memory file was closed successfully.
1669pub struct FileCloseResult {
1670 /// The [FileId] of the in-memory file that was closed.
1671 pub closed_id: FileId,
1672 /// The [FileId] of the file loaded from the filesystem with the same path as the closed
1673 /// file, or an error indicating why the file could not be opened.
1674 /// An error would be expected here in the case that the file was deleted, which would
1675 /// also send a close file notification to the LSP.
1676 pub replacement_id: Result<FileId, io::Error>,
1677}
1678
1679/// Cache entries for sources.
1680///
1681/// A source can be either a snippet input by the user, in which case it is only identified by its
1682/// name in the name-id table, and a unique `FileId`. On the other hand, different versions of the
1683/// same file can coexist during the same session of the REPL. For this reason, an entry of the
1684/// name-id table of a file also stores the *modified at* timestamp, such that if a file is
1685/// imported or loaded again and has been modified in between, the entry is invalidated, the
1686/// content is loaded again and a new `FileId` is generated.
1687///
1688/// Note that in that case, invalidation just means that the `FileId` of a previous version is not
1689/// accessible anymore in the name-id table. However, terms that contain non evaluated imports or
1690/// source locations referring to previous version are still able access the corresponding source
1691/// or term which are kept respectively in `files` and `cache` by using the corresponding `FileId`.
1692#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Copy, Clone)]
1693pub struct NameIdEntry {
1694 id: FileId,
1695 source: SourceKind,
1696}
1697
1698/// The state of an entry of the term cache.
1699///
1700/// # Imports
1701///
1702/// Usually, when applying a procedure to a term entry (e.g. program transformations), we process
1703/// all of its transitive imports as well. We start by processing the entry, updating the state to
1704/// `XXXing` (ex: `Typechecking`) upon success. Only when all the imports have been successfully
1705/// processed, the state is updated to `XXXed` (ex: `Typechecked`).
1706#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Copy, Clone, Default)]
1707pub enum TermEntryState {
1708 /// The initial state. The term is in the cache but hasn't been processed further yet.
1709 #[default]
1710 Populated,
1711 /// A custom transformation of the entry (through `Program::custom_transform`) is underway.
1712 CustomTransforming,
1713 /// This entry has completed custom transformations of this ID and lower.
1714 CustomTransformed { transform_id: usize },
1715 /// The imports of the entry have been resolved, and the imports of its (transitive) imports are
1716 /// being resolved.
1717 ImportsResolving,
1718 /// The imports of the entry and its transitive dependencies has been resolved.
1719 ImportsResolved,
1720 /// The entry have been transformed, and its (transitive) imports are being transformed.
1721 Transforming,
1722 /// The entry and its transitive imports have been transformed.
1723 Transformed,
1724 /// The entry has been closurized.
1725 Closurized,
1726}
1727
1728impl TermEntryState {
1729 fn needs_custom_transform(&self, transform_id: usize) -> bool {
1730 if let TermEntryState::CustomTransformed {
1731 transform_id: done_transform_id,
1732 } = self
1733 {
1734 transform_id > *done_transform_id
1735 } else {
1736 *self < TermEntryState::CustomTransforming
1737 }
1738 }
1739}
1740
1741/// The state of an entry in the AST cache. Equivalent of [TermEntryState] but for ASTs.
1742#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Copy, Clone, Default)]
1743pub enum AstEntryState {
1744 /// The initial state. The AST is in the cache but hasn't been processed further yet.
1745 #[default]
1746 Parsed,
1747 /// The entry have been typechecked, and its (transitive) imports are being typechecked.
1748 Typechecking,
1749 /// The entry and its transitive imports have been typechecked.
1750 Typechecked,
1751}
1752
1753/// The result of a cache operation, such as parsing, typechecking, etc. which can either have
1754/// performed actual work, or have done nothing if the corresponding entry was already at a later
1755/// stage.
1756#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Copy, Clone)]
1757pub enum CacheOp<T> {
1758 Done(T),
1759 Cached(T),
1760}
1761
1762impl<T> CacheOp<T> {
1763 pub fn inner(self: CacheOp<T>) -> T {
1764 match self {
1765 CacheOp::Done(t) | CacheOp::Cached(t) => t,
1766 }
1767 }
1768}
1769
1770/// Wrapper around other errors to indicate that typechecking or applying program transformations
1771/// failed because the source has not been parsed yet.
1772///
1773/// #Â Type parameters
1774///
1775/// - `E`: the underlying, wrapped error type
1776/// - `S`: the entry state, whether [TermEntryState] or [AstEntryState] in practice.
1777#[derive(Eq, PartialEq, Debug, Clone)]
1778pub enum CacheError<E, S> {
1779 Error(E),
1780 /// The state of the entry in the cache is incompatible with the requested operation.
1781 IncompatibleState {
1782 want: S,
1783 },
1784}
1785
1786pub type AstCacheError<E> = CacheError<E, AstEntryState>;
1787pub type TermCacheError<E> = CacheError<E, TermEntryState>;
1788
1789impl<E, S> From<E> for CacheError<E, S> {
1790 fn from(e: E) -> Self {
1791 CacheError::Error(e)
1792 }
1793}
1794
1795impl<E, S> CacheError<E, S> {
1796 #[track_caller]
1797 pub fn unwrap_error(self, msg: &str) -> E {
1798 match self {
1799 CacheError::Error(err) => err,
1800 CacheError::IncompatibleState { .. } => panic!("{}", msg),
1801 }
1802 }
1803
1804 pub fn map_err<O>(self, f: impl FnOnce(E) -> O) -> CacheError<O, S> {
1805 match self {
1806 CacheError::Error(e) => CacheError::Error(f(e)),
1807 CacheError::IncompatibleState { want } => CacheError::IncompatibleState { want },
1808 }
1809 }
1810
1811 /// Assuming that `self` is of the form `CacheError::Error(e)`, cast the error type to another
1812 /// arbitrary state type `T`.
1813 ///
1814 /// # Panic
1815 ///
1816 /// This method panics if `self` is [CacheError::IncompatibleState].
1817 #[track_caller]
1818 pub fn force_cast<T>(self) -> CacheError<E, T> {
1819 match self {
1820 CacheError::Error(e) => CacheError::Error(e),
1821 CacheError::IncompatibleState { want: _ } => panic!(),
1822 }
1823 }
1824}
1825
1826/// Input data usually comes from files on the file system, but there are also lots of cases where
1827/// we want to synthesize other kinds of inputs.
1828///
1829/// Note that a [SourcePath] does not uniquely identify a cached input:
1830///
1831/// - Some functions (like [SourceCache::add_file]) add a new cached input unconditionally.
1832/// - [`SourceCache::get_or_add_file`] will add a new cached input at the same `SourcePath` if the file
1833/// on disk was updated.
1834///
1835/// The equality checking of `SourcePath` only affects [SourceCache::replace_string], which
1836/// overwrites any previous cached input with the same `SourcePath`.
1837#[derive(Debug, PartialEq, Eq, Hash, Clone)]
1838pub enum SourcePath {
1839 /// A file at the given path.
1840 ///
1841 /// Note that this does not need to be a real file on the filesystem: it could still be loaded
1842 /// from memory by, e.g, [`SourceCache::add_string`].
1843 ///
1844 /// This is the only `SourcePath` variant that can be resolved as the target of an import
1845 /// statement.
1846 Path(PathBuf, InputFormat),
1847 /// A subrange of a file at the given path.
1848 ///
1849 /// This is used by NLS to analyze small parts of files that don't fully parse. The original
1850 /// file path is preserved, because it's needed for resolving imports.
1851 Snippet(PathBuf),
1852 Std(StdlibModule),
1853 Query,
1854 ReplInput(usize),
1855 ReplTypecheck,
1856 ReplQuery,
1857 CliFieldAssignment,
1858 Override(FieldPath),
1859 Generated(String),
1860}
1861
1862impl<'a> TryFrom<&'a SourcePath> for &'a OsStr {
1863 type Error = ();
1864
1865 fn try_from(value: &'a SourcePath) -> Result<Self, Self::Error> {
1866 match value {
1867 SourcePath::Path(p, _) | SourcePath::Snippet(p) => Ok(p.as_os_str()),
1868 _ => Err(()),
1869 }
1870 }
1871}
1872
1873impl From<&SourcePath> for Option<InputFormat> {
1874 fn from(source_path: &SourcePath) -> Option<InputFormat> {
1875 if let SourcePath::Path(_p, fmt) = source_path {
1876 Some(*fmt)
1877 } else {
1878 None
1879 }
1880 }
1881}
1882
1883// [`Files`] needs to have an OsString for each file, so we synthesize names even for sources that
1884// don't have them. They don't need to be unique; they're just used for diagnostics.
1885impl From<SourcePath> for OsString {
1886 fn from(source_path: SourcePath) -> Self {
1887 match source_path {
1888 SourcePath::Path(p, _) | SourcePath::Snippet(p) => p.into(),
1889 SourcePath::Std(StdlibModule::Std) => "<stdlib/std.ncl>".into(),
1890 SourcePath::Std(StdlibModule::Internals) => "<stdlib/internals.ncl>".into(),
1891 SourcePath::Query => "<query>".into(),
1892 SourcePath::ReplInput(idx) => format!("<repl-input-{idx}>").into(),
1893 SourcePath::ReplTypecheck => "<repl-typecheck>".into(),
1894 SourcePath::ReplQuery => "<repl-query>".into(),
1895 SourcePath::CliFieldAssignment => "<cli-assignment>".into(),
1896 SourcePath::Override(path) => format!("<override {path}>",).into(),
1897 SourcePath::Generated(description) => format!("<generated {description}>").into(),
1898 }
1899 }
1900}
1901
1902/// Return status indicating if an import has been resolved from a file (first encounter), or was
1903/// retrieved from the cache.
1904///
1905/// See [ImportResolver::resolve].
1906#[derive(Debug, PartialEq, Eq)]
1907pub enum ResolvedTerm {
1908 FromFile {
1909 path: PathBuf, /* the loaded path */
1910 },
1911 FromCache,
1912}
1913
1914#[derive(Copy, Clone, Debug, PartialEq, Eq)]
1915pub enum SourceState {
1916 UpToDate(FileId),
1917 /// The source is stale because it came from a file on disk that has since been updated. The
1918 /// data is the timestamp of the new version of the file.
1919 Stale(SystemTime),
1920}
1921
1922/// Abstract the access to imported files and the import cache. Used by the evaluator and at the
1923/// [import resolution](crate::transform::import_resolution) phase.
1924///
1925/// The standard implementation uses 2 caches, the file cache for raw contents and the term cache
1926/// for parsed contents, mirroring the 2 steps when resolving an import:
1927///
1928/// 1. When an import is encountered for the first time, the content of the corresponding file is
1929/// read and stored in the file cache (consisting of the file database plus a map between paths
1930/// and ids in the database, the name-id table). The content is parsed, stored in the term
1931/// cache, and queued somewhere so that it can undergo the standard
1932/// [transformations](crate::transform) (including import resolution) later.
1933/// 2. When it is finally processed, the term cache is updated with the transformed term.
1934///
1935/// # RFC007
1936///
1937/// Import resolution on the old representation is still needed only because of the evaluator. The
1938/// typechecker now uses the new AST representation with its own import resolver.
1939pub trait ImportResolver {
1940 /// Resolves an import.
1941 ///
1942 /// Reads and stores the content of an import, puts it in the file cache (or get it from there
1943 /// if it is cached), then parses it and returns the corresponding term and file id.
1944 ///
1945 /// The term and the path are provided only if the import is processed for the first time.
1946 /// Indeed, at import resolution phase, the term of an import encountered for the first time is
1947 /// queued to be processed (e.g. having its own imports resolved). The path is needed to
1948 /// resolve nested imports relatively to this parent. Only after this processing the term is
1949 /// inserted back in the cache. On the other hand, if it has been resolved before, it is
1950 /// already transformed in the cache and do not need further processing.
1951 fn resolve(
1952 &mut self,
1953 pos_table: &mut PosTable,
1954 import: &term::Import,
1955 parent: Option<FileId>,
1956 pos_idx: PosIdx,
1957 ) -> Result<(ResolvedTerm, FileId), ImportError>;
1958
1959 /// Return a reference to the file database.
1960 fn files(&self) -> &Files;
1961
1962 /// Get a resolved import from the term cache.
1963 fn get(&self, file_id: FileId) -> Option<NickelValue>;
1964 /// Return the (potentially normalized) file path corresponding to the ID of a resolved import.
1965 fn get_path(&self, file_id: FileId) -> Option<&OsStr>;
1966
1967 /// Returns the base path for Nix evaluation, which is the parent directory of the source file
1968 /// if any, or the current working directory, or an empty path if we couldn't determine any of
1969 /// the previous two.
1970 ///
1971 /// This method need to be here because the evaluator makes use of it (when evaluating the
1972 /// `eval_nix` primop), but at this stage it only has access to the `ImportResolver` interface.
1973 /// We could give a default implementation here just using [Self::get_path], but we also need
1974 /// `get_base_dir_for_nix` in [SourceCache]. We reuse the latter `implementation instead of
1975 /// duplicating a more generic variant here.
1976 #[cfg(feature = "nix-experimental")]
1977 fn get_base_dir_for_nix(&self, file_id: FileId) -> PathBuf;
1978}
1979
1980impl ImportResolver for CacheHub {
1981 fn resolve(
1982 &mut self,
1983 pos_table: &mut PosTable,
1984 import: &term::Import,
1985 parent: Option<FileId>,
1986 pos_idx: PosIdx,
1987 ) -> Result<(ResolvedTerm, FileId), ImportError> {
1988 let pos = pos_table.get(pos_idx);
1989
1990 let (possible_parents, path, pkg_id, format) = match import {
1991 term::Import::Path { path, format } => {
1992 // `parent` is the file that did the import. We first look in its containing directory, followed by
1993 // the directories in the import path.
1994 let mut parent_path = parent
1995 .and_then(|p| self.get_path(p))
1996 .map(PathBuf::from)
1997 .unwrap_or_default();
1998 parent_path.pop();
1999
2000 (
2001 std::iter::once(parent_path)
2002 .chain(self.sources.import_paths.iter().cloned())
2003 .collect(),
2004 Path::new(path),
2005 None,
2006 *format,
2007 )
2008 }
2009 term::Import::Package { id } => {
2010 let package_map = self
2011 .sources
2012 .package_map
2013 .as_ref()
2014 .ok_or(ImportErrorKind::NoPackageMap { pos })?;
2015 let parent_path = parent
2016 .and_then(|p| self.sources.packages.get(&p))
2017 .map(PathBuf::as_path);
2018 let pkg_path = package_map.get(parent_path, *id, pos)?;
2019 (
2020 vec![pkg_path.to_owned()],
2021 Path::new("main.ncl"),
2022 Some(pkg_path.to_owned()),
2023 // Packages are always in nickel format
2024 InputFormat::Nickel,
2025 )
2026 }
2027 };
2028
2029 // Try to import from all possibilities, taking the first one that succeeds.
2030 let (id_op, path_buf) = possible_parents
2031 .iter()
2032 .find_map(|parent| {
2033 let mut path_buf = parent.clone();
2034 path_buf.push(path);
2035 self.sources
2036 .get_or_add_file(&path_buf, format)
2037 .ok()
2038 .map(|x| (x, path_buf))
2039 })
2040 .ok_or_else(|| {
2041 let parents = possible_parents
2042 .iter()
2043 .map(|p| p.to_string_lossy())
2044 .collect::<Vec<_>>();
2045 ImportErrorKind::IOError(
2046 path.to_string_lossy().into_owned(),
2047 format!("could not find import (looked in [{}])", parents.join(", ")),
2048 pos,
2049 )
2050 })?;
2051
2052 let (result, file_id) = match id_op {
2053 CacheOp::Cached(id) => (ResolvedTerm::FromCache, id),
2054 CacheOp::Done(id) => (ResolvedTerm::FromFile { path: path_buf }, id),
2055 };
2056
2057 if let Some(parent) = parent {
2058 self.import_data
2059 .imports
2060 .entry(parent)
2061 .or_default()
2062 .insert(ImportTarget { file_id, format });
2063 self.import_data
2064 .rev_imports
2065 .entry(file_id)
2066 .or_default()
2067 .entry(parent)
2068 .or_insert(pos);
2069 }
2070
2071 self.parse_to_term(pos_table, file_id, format)
2072 .map_err(|err| ImportErrorKind::ParseErrors(err, pos))?;
2073
2074 if let Some(pkg_id) = pkg_id {
2075 self.sources.packages.insert(file_id, pkg_id);
2076 }
2077
2078 Ok((result, file_id))
2079 }
2080
2081 fn files(&self) -> &Files {
2082 &self.sources.files
2083 }
2084
2085 fn get(&self, file_id: FileId) -> Option<NickelValue> {
2086 self.terms
2087 .terms
2088 .get(&file_id)
2089 .map(|TermEntry { value, .. }| value.clone())
2090 }
2091
2092 fn get_path(&self, file_id: FileId) -> Option<&OsStr> {
2093 self.sources
2094 .file_paths
2095 .get(&file_id)
2096 .and_then(|p| p.try_into().ok())
2097 }
2098
2099 #[cfg(feature = "nix-experimental")]
2100 fn get_base_dir_for_nix(&self, file_id: FileId) -> PathBuf {
2101 self.sources.get_base_dir_for_nix(file_id)
2102 }
2103}
2104
2105/// Import resolution for new AST representation (RFC007).
2106pub trait AstImportResolver {
2107 /// Resolves an import to an AST.
2108 ///
2109 /// Reads and stores the content of an import, puts it in the file cache (or gets it from there
2110 /// if it is cached), then parses it and returns the corresponding term and file id.
2111 ///
2112 /// The term and the path are provided only if the import is processed for the first time.
2113 /// Indeed, at import resolution phase, the term of an import encountered for the first time is
2114 /// queued to be processed (e.g. having its own imports resolved). The path is needed to
2115 /// resolve nested imports relatively to this parent. Only after this processing the term is
2116 /// inserted back in the cache. On the other hand, if it has been resolved before, it is
2117 /// already transformed in the cache and do not need further processing.
2118 ///
2119 /// # Returns
2120 ///
2121 /// [Self::resolve] returns `Ok(None)` if the import is an external format, which can currently
2122 /// be serialized directly to he runtime representation ([NickelValue]) without going through
2123 /// an AST. AST import resolution is mostly used by the typechecker, and the typechecker
2124 /// currently ignores external formats anyway.
2125 ///
2126 /// # Lifetimes
2127 ///
2128 /// The signature is parametrized by two different lifetimes. This is due mostly to NLS: in the
2129 /// normal Nickel pipeline, all the ASTs are currently allocated in the same arena, and their
2130 /// lifetime is the same. However, in NLS, each files needs to be managed separately. At the
2131 /// import boundary, we're thus not guaranteed to get an AST that lives as long as the one
2132 /// being currently typechecked.
2133 fn resolve<'ast_out>(
2134 &'ast_out mut self,
2135 import: &ast::Import<'_>,
2136 pos: &TermPos,
2137 ) -> Result<Option<&'ast_out Ast<'ast_out>>, ImportErrorKind>;
2138}
2139
2140/// Normalize the path of a file for unique identification in the cache.
2141///
2142/// The returned path will be an absolute path.
2143pub fn normalize_path(path: impl Into<PathBuf>) -> std::io::Result<PathBuf> {
2144 let mut path = path.into();
2145 if path.is_relative() {
2146 path = std::env::current_dir()?.join(path);
2147 }
2148 Ok(normalize_abs_path(&path))
2149}
2150
2151/// Normalize the path (assumed to be absolute) of a file for unique identification in the cache.
2152///
2153/// This implementation (including the comment below) was taken from cargo-util.
2154///
2155/// CAUTION: This does not resolve symlinks (unlike [`std::fs::canonicalize`]). This may cause
2156/// incorrect or surprising behavior at times. This should be used carefully. Unfortunately,
2157/// [`std::fs::canonicalize`] can be hard to use correctly, since it can often fail, or on Windows
2158/// returns annoying device paths. This is a problem Cargo needs to improve on.
2159pub fn normalize_abs_path(path: &Path) -> PathBuf {
2160 use std::path::Component;
2161
2162 let mut components = path.components().peekable();
2163 let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().cloned() {
2164 components.next();
2165 PathBuf::from(c.as_os_str())
2166 } else {
2167 PathBuf::new()
2168 };
2169
2170 for component in components {
2171 match component {
2172 Component::Prefix(..) => unreachable!(),
2173 Component::RootDir => {
2174 ret.push(component.as_os_str());
2175 }
2176 Component::CurDir => {}
2177 Component::ParentDir => {
2178 ret.pop();
2179 }
2180 Component::Normal(c) => {
2181 ret.push(c);
2182 }
2183 }
2184 }
2185 ret
2186}
2187
2188/// Normalize a relative path, removing mid-path `..`s.
2189///
2190/// Like [`normalize_abs_path`], this works only on the path itself (i.e. not the filesystem) and
2191/// does not follow symlinks.
2192pub fn normalize_rel_path(path: &Path) -> PathBuf {
2193 use std::path::Component;
2194
2195 let mut components = path.components().peekable();
2196 let mut parents = PathBuf::new();
2197 let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().cloned() {
2198 components.next();
2199 PathBuf::from(c.as_os_str())
2200 } else {
2201 PathBuf::new()
2202 };
2203
2204 for component in components {
2205 match component {
2206 Component::Prefix(..) => unreachable!(),
2207 Component::RootDir => {
2208 ret.push(component.as_os_str());
2209 }
2210 Component::CurDir => {}
2211 Component::ParentDir => {
2212 if !ret.pop() {
2213 parents.push(Component::ParentDir);
2214 }
2215 }
2216 Component::Normal(c) => {
2217 ret.push(c);
2218 }
2219 }
2220 }
2221 parents.extend(ret.components());
2222 parents
2223}
2224
2225/// Returns the timestamp of a file. Return `None` if an IO error occurred.
2226pub fn timestamp(path: impl AsRef<OsStr>) -> io::Result<SystemTime> {
2227 fs::metadata(path.as_ref())?.modified()
2228}
2229
2230/// As RFC007 is being rolled out, the typechecker now needs to operate on the new AST. We need a
2231/// structure that implements [AstImportResolver].
2232///
2233/// For borrowing reasons, this can't be all of [CacheHub] or all of [ast_cache::AstCache], as we
2234/// need to split the different things that are borrowed mutably or immutably. `AstResolver` is a
2235/// structure that borrows some parts of the cache during its lifetime and will retrieve alredy
2236/// imported ASTs, or register the newly imported ones in a separate hashmap that can be added back
2237/// to the original cache once import resolution is done.
2238pub struct AstResolver<'ast, 'cache> {
2239 /// The AST allocator used to parse new sources.
2240 alloc: &'ast AstAlloc,
2241 /// The AST cache, which is added to as import resolution progresses.
2242 asts: &'cache mut HashMap<FileId, AstEntry<'ast>>,
2243 /// The source cache where new sources will be stored.
2244 sources: &'cache mut SourceCache,
2245 /// Direct and reverse dependencies of files (with respect to imports).
2246 import_data: &'cache mut ImportData,
2247}
2248
2249impl<'ast, 'cache> AstResolver<'ast, 'cache> {
2250 /// Create a new `AstResolver` from an allocator, an ast cache and a cache hub slice.
2251 pub fn new(
2252 alloc: &'ast AstAlloc,
2253 asts: &'cache mut HashMap<FileId, AstEntry<'ast>>,
2254 slice: CacheHubView<'cache>,
2255 ) -> Self {
2256 Self {
2257 alloc,
2258 asts,
2259 sources: slice.sources,
2260 import_data: slice.import_data,
2261 }
2262 }
2263}
2264
2265impl AstImportResolver for AstResolver<'_, '_> {
2266 fn resolve(
2267 &mut self,
2268 import: &ast::Import<'_>,
2269 pos: &TermPos,
2270 ) -> Result<Option<&Ast<'_>>, ImportErrorKind> {
2271 let parent_id = pos.src_id();
2272
2273 let (possible_parents, path, pkg_id, format) = match import {
2274 ast::Import::Path { path, format } => {
2275 // `parent` is the file that did the import. We first look in its containing
2276 // directory, followed by the directories in the import path.
2277 let parent_path = parent_id
2278 .and_then(|parent| self.sources.file_paths.get(&parent))
2279 .and_then(|path| <&OsStr>::try_from(path).ok())
2280 .map(PathBuf::from)
2281 .map(|mut path| {
2282 path.pop();
2283 path
2284 })
2285 // If the parent isn't a proper file, we look in the current directory instead.
2286 // This is useful when importing e.g. from the REPL or the CLI directly.
2287 .unwrap_or_default();
2288
2289 (
2290 std::iter::once(parent_path)
2291 .chain(self.sources.import_paths.iter().cloned())
2292 .collect(),
2293 Path::new(path),
2294 None,
2295 *format,
2296 )
2297 }
2298 ast::Import::Package { id } => {
2299 let package_map = self
2300 .sources
2301 .package_map
2302 .as_ref()
2303 .ok_or(ImportErrorKind::NoPackageMap { pos: *pos })?;
2304 let parent_path = parent_id
2305 .and_then(|p| self.sources.packages.get(&p))
2306 .map(PathBuf::as_path);
2307 let pkg_path = package_map.get(parent_path, *id, *pos)?;
2308 (
2309 vec![pkg_path.to_owned()],
2310 Path::new("main.ncl"),
2311 Some(pkg_path.to_owned()),
2312 // Packages are always in nickel format
2313 InputFormat::Nickel,
2314 )
2315 }
2316 };
2317
2318 // Try to import from all possibilities, taking the first one that succeeds.
2319 let id_op = possible_parents
2320 .iter()
2321 .find_map(|parent| {
2322 let mut path_buf = parent.clone();
2323 path_buf.push(path);
2324 self.sources.get_or_add_file(&path_buf, format).ok()
2325 })
2326 .ok_or_else(|| {
2327 let parents = possible_parents
2328 .iter()
2329 .map(|p| p.to_string_lossy())
2330 .collect::<Vec<_>>();
2331 ImportErrorKind::IOError(
2332 path.to_string_lossy().into_owned(),
2333 format!("could not find import (looked in [{}])", parents.join(", ")),
2334 *pos,
2335 )
2336 })?;
2337
2338 let file_id = id_op.inner();
2339
2340 if let Some(parent_id) = parent_id {
2341 self.import_data
2342 .imports
2343 .entry(parent_id)
2344 .or_default()
2345 .insert(ImportTarget { file_id, format });
2346 self.import_data
2347 .rev_imports
2348 .entry(file_id)
2349 .or_default()
2350 .entry(parent_id)
2351 .or_insert(*pos);
2352 }
2353
2354 if let Some(pkg_id) = pkg_id {
2355 self.sources.packages.insert(file_id, pkg_id);
2356 }
2357
2358 if let InputFormat::Nickel = format {
2359 if let Some(entry) = self.asts.get(&file_id) {
2360 Ok(Some(entry.ast))
2361 } else {
2362 let ast = parse_nickel(self.alloc, file_id, self.sources.files.source(file_id))
2363 .map_err(|parse_err| ImportErrorKind::ParseErrors(parse_err, *pos))?;
2364 let ast = self.alloc.alloc(ast);
2365 self.asts.insert(file_id, AstEntry::new(ast));
2366
2367 Ok(Some(ast))
2368 }
2369 } else {
2370 // Currently, non-Nickel file are just ignored during the AST phase. They are parsed
2371 // later directly into the runtime
2372 Ok(None)
2373 }
2374 }
2375}
2376
2377/// Provide mockup import resolvers for testing purpose.
2378pub mod resolvers {
2379 use super::*;
2380 use crate::term::Import;
2381
2382 /// A dummy resolver that panics when asked to do something. Used to test code that contains no
2383 /// import.
2384 pub struct DummyResolver {}
2385
2386 impl ImportResolver for DummyResolver {
2387 fn resolve(
2388 &mut self,
2389 _pos_table: &mut PosTable,
2390 _import: &Import,
2391 _parent: Option<FileId>,
2392 _pos_idx: PosIdx,
2393 ) -> Result<(ResolvedTerm, FileId), ImportError> {
2394 panic!("cache::resolvers: dummy resolver should not have been invoked");
2395 }
2396
2397 fn files(&self) -> &Files {
2398 panic!("cache::resolvers: dummy resolver should not have been invoked");
2399 }
2400
2401 fn get(&self, _file_id: FileId) -> Option<NickelValue> {
2402 panic!("cache::resolvers: dummy resolver should not have been invoked");
2403 }
2404
2405 fn get_path(&self, _file_id: FileId) -> Option<&OsStr> {
2406 panic!("cache::resolvers: dummy resolver should not have been invoked");
2407 }
2408
2409 #[cfg(feature = "nix-experimental")]
2410 fn get_base_dir_for_nix(&self, _file_id: FileId) -> PathBuf {
2411 panic!("cache::resolvers: dummy resolver should not have been invoked");
2412 }
2413 }
2414
2415 /// Resolve imports from a mockup file database. Used to test imports without accessing the
2416 /// file system. File name are stored as strings, and silently converted from/to `OsString`
2417 /// when needed: don't use this resolver with source code that import non UTF-8 paths.
2418 #[derive(Clone, Default)]
2419 pub struct SimpleResolver {
2420 files: Files,
2421 file_cache: HashMap<String, FileId>,
2422 term_cache: HashMap<FileId, NickelValue>,
2423 }
2424
2425 impl SimpleResolver {
2426 pub fn new() -> SimpleResolver {
2427 SimpleResolver::default()
2428 }
2429
2430 /// Add a mockup file to available imports.
2431 pub fn add_source(&mut self, name: String, source: String) {
2432 let id = self.files.add(name.clone(), source);
2433 self.file_cache.insert(name, id);
2434 }
2435 }
2436
2437 impl ImportResolver for SimpleResolver {
2438 fn resolve(
2439 &mut self,
2440 pos_table: &mut PosTable,
2441 import: &Import,
2442 _parent: Option<FileId>,
2443 pos_idx: PosIdx,
2444 ) -> Result<(ResolvedTerm, FileId), ImportError> {
2445 let Import::Path { path, .. } = import else {
2446 panic!("simple resolver doesn't support packages");
2447 };
2448
2449 let pos = pos_table.get(pos_idx);
2450
2451 let file_id = self
2452 .file_cache
2453 .get(path.to_string_lossy().as_ref())
2454 .copied()
2455 .ok_or_else(|| {
2456 ImportErrorKind::IOError(
2457 path.to_string_lossy().into_owned(),
2458 String::from("Import not found by the mockup resolver."),
2459 pos,
2460 )
2461 })?;
2462
2463 if let hash_map::Entry::Vacant(e) = self.term_cache.entry(file_id) {
2464 let buf = self.files.source(file_id);
2465 let alloc = AstAlloc::new();
2466
2467 let ast = parser::grammar::TermParser::new()
2468 .parse_strict(&alloc, file_id, Lexer::new(buf))
2469 .map_err(|e| ImportErrorKind::ParseErrors(e, pos))?;
2470 e.insert(ast.to_mainline(pos_table));
2471
2472 Ok((
2473 ResolvedTerm::FromFile {
2474 path: PathBuf::new(),
2475 },
2476 file_id,
2477 ))
2478 } else {
2479 Ok((ResolvedTerm::FromCache, file_id))
2480 }
2481 }
2482
2483 fn files(&self) -> &Files {
2484 &self.files
2485 }
2486
2487 fn get(&self, file_id: FileId) -> Option<NickelValue> {
2488 self.term_cache.get(&file_id).cloned()
2489 }
2490
2491 fn get_path(&self, file_id: FileId) -> Option<&OsStr> {
2492 Some(self.files.name(file_id))
2493 }
2494
2495 #[cfg(feature = "nix-experimental")]
2496 fn get_base_dir_for_nix(&self, file_id: FileId) -> PathBuf {
2497 self.get_path(file_id)
2498 .and_then(|path| Path::new(path).parent())
2499 .map(PathBuf::from)
2500 .unwrap_or_default()
2501 }
2502 }
2503}
2504
2505/// Parses a Nickel expression from a string.
2506fn parse_nickel<'ast>(
2507 alloc: &'ast AstAlloc,
2508 file_id: FileId,
2509 source: &str,
2510) -> Result<Ast<'ast>, ParseErrors> {
2511 let ast = measure_runtime!(
2512 "runtime:parse:nickel",
2513 parser::grammar::TermParser::new().parse_strict(alloc, file_id, Lexer::new(source))?
2514 );
2515
2516 Ok(ast)
2517}
2518
2519// Parse a Nickel REPL input. In addition to normal Nickel expressions, it can be a top-level let.
2520fn parse_nickel_repl<'ast>(
2521 alloc: &'ast AstAlloc,
2522 file_id: FileId,
2523 source: &str,
2524) -> Result<ExtendedTerm<Ast<'ast>>, ParseErrors> {
2525 let et = measure_runtime!(
2526 "runtime:parse:nickel",
2527 parser::grammar::ExtendedTermParser::new().parse_strict(
2528 alloc,
2529 file_id,
2530 Lexer::new(source)
2531 )?
2532 );
2533
2534 Ok(et)
2535}
2536
2537/// AST cache (for the new [crate::ast::Ast]) that holds the owned allocator of the AST
2538/// nodes.
2539mod ast_cache {
2540 use super::*;
2541 use crate::traverse::TraverseAlloc as _;
2542
2543 /// The AST cache packing together the AST allocator and the cached ASTs.
2544 #[self_referencing]
2545 pub struct AstCache {
2546 /// The allocator hosting AST nodes.
2547 alloc: AstAlloc,
2548 /// An AST for each file we have cached.
2549 #[borrows(alloc)]
2550 #[covariant]
2551 asts: HashMap<FileId, AstEntry<'this>>,
2552 /// The initial typing context. It's morally an option (unitialized at first), but we just
2553 /// use an empty context as a default value.
2554 ///
2555 /// This context can be augmented through [AstCache::add_repl_binding] and
2556 /// [AstCache::add_repl_bindings], which is typically used in the REPL to add top-level
2557 /// bindings.
2558 #[borrows(alloc)]
2559 #[not_covariant]
2560 type_ctxt: typecheck::Context<'this>,
2561 /// Mapping of each wildcard id to its inferred type, for each file in the cache. This is
2562 /// the same as [super::WildcardsCache], but in the new AST representation. It is later on
2563 /// transformed to the runtime representation to populate the wildcard cache.
2564 #[borrows(alloc)]
2565 #[covariant]
2566 wildcards: HashMap<FileId, typecheck::Wildcards<'this>>,
2567 }
2568
2569 impl AstCache {
2570 /// Construct a new, empty, AST cache.
2571 pub fn empty() -> Self {
2572 AstCache::new(
2573 AstAlloc::new(),
2574 |_alloc| HashMap::new(),
2575 |_alloc| typecheck::Context::new(),
2576 |_alloc| HashMap::new(),
2577 )
2578 }
2579
2580 /// Clears the allocator and the cached ASTs.
2581 pub fn clear(&mut self) {
2582 *self = Self::empty();
2583 }
2584
2585 /// Returns `true` if the AST cache contains an entry for the given file id.
2586 pub fn contains(&self, file_id: FileId) -> bool {
2587 self.borrow_asts().contains_key(&file_id)
2588 }
2589
2590 /// Returns the underlying allocator, which might be required to call various helpers.
2591 pub fn get_alloc(&self) -> &AstAlloc {
2592 self.borrow_alloc()
2593 }
2594
2595 /// Returns a reference to a cached AST.
2596 pub fn get(&self, file_id: FileId) -> Option<&Ast<'_>> {
2597 self.borrow_asts().get(&file_id).map(|entry| entry.ast)
2598 }
2599
2600 /// Returns a reference to a cached AST entry.
2601 pub fn get_entry(&self, file_id: FileId) -> Option<&AstEntry<'_>> {
2602 self.borrow_asts().get(&file_id)
2603 }
2604
2605 /// Returns the wildcards associated to an entry.
2606 pub fn get_wildcards(&self, file_id: FileId) -> Option<&typecheck::Wildcards<'_>> {
2607 self.borrow_wildcards().get(&file_id)
2608 }
2609
2610 /// Retrieves the state of an entry. Returns `None` if the entry is not in the AST cache.
2611 pub fn entry_state(&self, file_id: FileId) -> Option<AstEntryState> {
2612 self.borrow_asts()
2613 .get(&file_id)
2614 .map(|AstEntry { state, .. }| *state)
2615 }
2616
2617 /// Updates the state of an entry and returns the previous state, or an error if the entry
2618 /// isn't in the cache.
2619 pub fn update_state(
2620 &mut self,
2621 file_id: FileId,
2622 new: AstEntryState,
2623 ) -> Result<AstEntryState, TermNotFound> {
2624 self.with_asts_mut(|asts| {
2625 asts.get_mut(&file_id)
2626 .map(|AstEntry { state, .. }| std::mem::replace(state, new))
2627 })
2628 .ok_or(TermNotFound)
2629 }
2630
2631 /// Parses a Nickel expression and stores the corresponding AST in the cache.
2632 pub fn parse_nickel<'ast>(
2633 &'ast mut self,
2634 file_id: FileId,
2635 source: &str,
2636 ) -> Result<&'ast Ast<'ast>, ParseErrors> {
2637 self.with_mut(|slf| {
2638 let ast = parse_nickel(slf.alloc, file_id, source)?;
2639 let ast = slf.alloc.alloc(ast);
2640 slf.asts.insert(file_id, AstEntry::new(ast));
2641
2642 Ok(ast)
2643 })
2644 }
2645
2646 /// Same as [Self::parse_nickel] but accepts the extended syntax allowed in the REPL.
2647 ///
2648 /// **Caution**: this method doesn't cache the potential id of a top-level let binding,
2649 /// although it does save the bound expression, which is required later for typechecking,
2650 /// program transformation, etc.
2651 pub fn parse_nickel_repl<'ast>(
2652 &'ast mut self,
2653 file_id: FileId,
2654 source: &str,
2655 ) -> Result<ExtendedTerm<Ast<'ast>>, ParseErrors> {
2656 self.with_mut(|slf| {
2657 let extd_ast = parse_nickel_repl(slf.alloc, file_id, source)?;
2658
2659 let ast = match &extd_ast {
2660 ExtendedTerm::Term(t) | ExtendedTerm::ToplevelLet(_, t) => {
2661 slf.alloc.alloc(t.clone())
2662 }
2663 };
2664
2665 slf.asts.insert(file_id, AstEntry::new(ast));
2666
2667 Ok(extd_ast)
2668 })
2669 }
2670
2671 pub fn remove(&mut self, file_id: FileId) -> Option<AstEntry<'_>> {
2672 self.with_asts_mut(|asts| asts.remove(&file_id))
2673 }
2674
2675 /// Typechecks an entry of the cache and updates its state accordingly, or does nothing if
2676 /// the entry has already been typechecked. Requires that the corresponding source has been
2677 /// parsed. Note that this method currently fail on a non-Nickel file, that can't have been
2678 /// parsed to an AST.
2679 ///
2680 /// If the source contains imports, recursively typecheck on the imports too.
2681 ///
2682 /// # RFC007
2683 ///
2684 /// During the transition period between the old VM and the new bytecode VM, this method
2685 /// performs typechecking on the new representation [crate::ast::Ast].
2686 pub fn typecheck(
2687 &mut self,
2688 mut slice: CacheHubView<'_>,
2689 file_id: FileId,
2690 initial_mode: TypecheckMode,
2691 ) -> Result<CacheOp<()>, AstCacheError<TypecheckError>> {
2692 let Some(state) = self.entry_state(file_id) else {
2693 return Err(CacheError::IncompatibleState {
2694 want: AstEntryState::Parsed,
2695 });
2696 };
2697
2698 // If we're already typechecking or we have typechecked the file, we stop right here.
2699 if state >= AstEntryState::Typechecking {
2700 return Ok(CacheOp::Cached(()));
2701 }
2702
2703 // Protect against cycles in the import graph.
2704 // unwrap(): we checked at the beginning of this function that the term is in the
2705 // cache.
2706 let _ = self
2707 .update_state(file_id, AstEntryState::Typechecking)
2708 .unwrap();
2709
2710 // Ensure the initial typing context is properly initialized.
2711 self.populate_type_ctxt(slice.sources);
2712 self.with_mut(|slf| -> Result<(), AstCacheError<TypecheckError>> {
2713 // unwrap(): we checked at the beginning of this function that the AST cache has an
2714 // entry for `file_id`.
2715 let ast = slf.asts.get(&file_id).unwrap().ast;
2716
2717 let mut resolver = AstResolver::new(slf.alloc, slf.asts, slice.reborrow());
2718 let type_ctxt = slf.type_ctxt.clone();
2719 let wildcards_map = measure_runtime!(
2720 "runtime:type_check",
2721 typecheck(slf.alloc, ast, type_ctxt, &mut resolver, initial_mode)?
2722 );
2723 slf.wildcards.insert(file_id, wildcards_map);
2724
2725 Ok(())
2726 })?;
2727
2728 // Typecheck dependencies (files imported by this file).
2729 if let Some(imports) = slice.import_data.imports.get(&file_id) {
2730 // Because we need to borrow `import_data` for typechecking, we need to release the
2731 // borrow by moving the content of `imports` somewhere else.
2732 //
2733 // We ignore non-Nickel imports, which aren't typechecked, and are currently not
2734 // even in the AST cache.
2735 let imports: Vec<_> = imports
2736 .iter()
2737 .filter_map(|tgt| {
2738 if let InputFormat::Nickel = tgt.format {
2739 Some(tgt.file_id)
2740 } else {
2741 None
2742 }
2743 })
2744 .collect();
2745
2746 for file_id in imports {
2747 self.typecheck(slice.reborrow(), file_id, initial_mode)?;
2748 }
2749 }
2750
2751 // unwrap(): we checked at the beginning of this function that the AST is in the
2752 // cache.
2753 let _ = self
2754 .update_state(file_id, AstEntryState::Typechecked)
2755 .unwrap();
2756
2757 Ok(CacheOp::Done(()))
2758 }
2759
2760 /// Typechecks the stdlib. This has to be public because it's used in benches. It probably
2761 /// does not have to be used for something else.
2762 pub fn typecheck_stdlib(
2763 &mut self,
2764 mut slice: CacheHubView<'_>,
2765 ) -> Result<CacheOp<()>, AstCacheError<TypecheckError>> {
2766 let mut ret = CacheOp::Cached(());
2767 self.populate_type_ctxt(slice.sources);
2768
2769 for (_, stdlib_module_id) in slice.sources.stdlib_modules() {
2770 let result =
2771 self.typecheck(slice.reborrow(), stdlib_module_id, TypecheckMode::Walk)?;
2772
2773 if let CacheOp::Done(()) = result {
2774 ret = CacheOp::Done(());
2775 }
2776 }
2777
2778 Ok(ret)
2779 }
2780
2781 /// Typechecks a file (if it wasn't already) and returns the inferred type, with type
2782 /// wildcards properly substituted.
2783 pub fn type_of(
2784 &mut self,
2785 mut slice: CacheHubView<'_>,
2786 file_id: FileId,
2787 ) -> Result<CacheOp<ast::typ::Type<'_>>, AstCacheError<TypecheckError>> {
2788 self.typecheck(slice.reborrow(), file_id, TypecheckMode::Walk)?;
2789
2790 self.with_mut(|slf| {
2791 let ast = slf
2792 .asts
2793 .get(&file_id)
2794 .ok_or(CacheError::IncompatibleState {
2795 want: AstEntryState::Parsed,
2796 })?
2797 .ast;
2798
2799 let mut resolver = AstResolver::new(slf.alloc, slf.asts, slice.reborrow());
2800 let type_ctxt = slf.type_ctxt.clone();
2801
2802 let typ: Result<ast::typ::Type<'_>, _> = TryConvert::try_convert(
2803 slf.alloc,
2804 ast.apparent_type(slf.alloc, Some(&type_ctxt.type_env), Some(&mut resolver)),
2805 );
2806
2807 let typ = typ.unwrap_or(ast::typ::TypeF::Dyn.into());
2808
2809 // unwrap(): we ensured that the file is typechecked, thus its wildcards and its AST
2810 // must be populated
2811 let wildcards = slf.wildcards.get(&file_id).unwrap();
2812
2813 Ok(CacheOp::Done(
2814 typ.traverse(
2815 slf.alloc,
2816 &mut |ty: ast::typ::Type| -> Result<_, std::convert::Infallible> {
2817 if let ast::typ::TypeF::Wildcard(id) = ty.typ {
2818 Ok(wildcards
2819 .get(id)
2820 .cloned()
2821 .unwrap_or(ast::typ::Type::from(ast::typ::TypeF::Dyn)))
2822 } else {
2823 Ok(ty)
2824 }
2825 },
2826 TraverseOrder::TopDown,
2827 )
2828 .unwrap(),
2829 ))
2830 })
2831 }
2832
2833 /// If the type context hasn't been created yet, generate and cache the initial typing
2834 /// context from the list of `file_ids` corresponding to the standard library parts.
2835 /// Otherwise, do nothing.
2836 fn populate_type_ctxt(&mut self, sources: &SourceCache) {
2837 self.with_mut(|slf| {
2838 if !slf.type_ctxt.is_empty() {
2839 return;
2840 }
2841 let stdlib_terms_vec: Vec<(StdlibModule, &'_ Ast<'_>)> = sources
2842 .stdlib_modules()
2843 .map(|(module, file_id)| {
2844 let ast = slf.asts.get(&file_id).map(|entry| entry.ast);
2845
2846 (
2847 module,
2848 ast.expect("cache::ast_cache::AstCache::populate_type_ctxt(): can't build environment, stdlib not parsed")
2849 )
2850 })
2851 .collect();
2852
2853 *slf.type_ctxt = typecheck::mk_initial_ctxt(slf.alloc, stdlib_terms_vec).unwrap();
2854 });
2855 }
2856
2857 /// Adds a binding to the type environment. The bound term is identified by its file id
2858 /// `file_id`.
2859 pub fn add_type_binding(
2860 &mut self,
2861 mut slice: CacheHubView<'_>,
2862 id: LocIdent,
2863 file_id: FileId,
2864 ) -> Result<(), AstCacheError<std::convert::Infallible>> {
2865 self.with_mut(|slf| {
2866 let Some(entry) = slf.asts.get(&file_id) else {
2867 return Err(CacheError::IncompatibleState {
2868 want: AstEntryState::Parsed,
2869 });
2870 };
2871
2872 let ast = entry.ast;
2873 let mut resolver = AstResolver::new(slf.alloc, slf.asts, slice.reborrow());
2874
2875 typecheck::env_add(
2876 slf.alloc,
2877 &mut slf.type_ctxt.type_env,
2878 id,
2879 ast,
2880 &slf.type_ctxt.term_env,
2881 &mut resolver,
2882 );
2883 //slf.asts.extend(resolver.new_asts.into_iter());
2884
2885 slf.type_ctxt
2886 .term_env
2887 .0
2888 .insert(id.ident(), (ast.clone(), slf.type_ctxt.term_env.clone()));
2889 Ok(())
2890 })?;
2891
2892 Ok(())
2893 }
2894
2895 /// Add the bindings of a record to the type environment. Ignore fields whose name are
2896 /// defined through interpolation.
2897 pub fn add_type_bindings(
2898 &mut self,
2899 pos_table: &PosTable,
2900 mut slice: CacheHubView<'_>,
2901 term: &NickelValue,
2902 ) -> Result<(), NotARecord> {
2903 self.with_mut(|slf| {
2904 // It's sad, but for now, we have to convert the term back to an AST to insert it in
2905 // the type environment.
2906 let ast = term.to_ast(slf.alloc, pos_table);
2907 let mut resolver = AstResolver::new(slf.alloc, slf.asts, slice.reborrow());
2908
2909 typecheck::env_add_term(
2910 slf.alloc,
2911 &mut slf.type_ctxt.type_env,
2912 ast,
2913 &slf.type_ctxt.term_env,
2914 &mut resolver,
2915 )
2916 .map_err(|_| NotARecord)
2917 })
2918 }
2919 }
2920}
2921
2922#[cfg(test)]
2923mod tests {
2924 use std::path::Path;
2925
2926 use super::*;
2927
2928 #[test]
2929 fn normalize_rel() {
2930 assert_eq!(
2931 &normalize_rel_path(Path::new("../a/../b")),
2932 Path::new("../b")
2933 );
2934 assert_eq!(
2935 &normalize_rel_path(Path::new("../../a/../b")),
2936 Path::new("../../b")
2937 );
2938 }
2939
2940 #[test]
2941 fn get_cached_source_with_relative_path() {
2942 let mut sources = SourceCache::new();
2943 let root_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("nickel-test-rootdir");
2944 let path = SourcePath::Path(root_path.join("file.ncl"), super::InputFormat::Nickel);
2945 let file_id = sources.replace_string(path, "1".into());
2946
2947 // This path should not exist on the host but should
2948 // match the in memory file that was set up in the cache
2949 let file = sources
2950 .get_or_add_file(
2951 root_path.join("subdir").join("..").join("file.ncl"),
2952 InputFormat::Nickel,
2953 )
2954 .expect("Missed cached file when pulling with relative path");
2955 assert_eq!(CacheOp::Cached(file_id), file);
2956 }
2957
2958 #[test]
2959 fn close_file() {
2960 let mut sources = SourceCache::new();
2961 let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("closed.ncl");
2962 let source_path = SourcePath::Path(path.clone(), InputFormat::Nickel);
2963 sources.add_string(source_path.clone(), "1".to_string());
2964 sources
2965 .close_in_memory_file(path.clone(), InputFormat::Nickel)
2966 .unwrap();
2967 assert_eq!(
2968 sources
2969 .file_ids
2970 .get(&source_path)
2971 .map(|it| it.source)
2972 .unwrap(),
2973 SourceKind::MemoryClosed
2974 );
2975
2976 // Since the closed file should be stale, id_or_new_timestamp_of should not return the
2977 // file ID for the closed file. Since in this case the file doesn't exist on the
2978 // filesystem, it should return an error.
2979 assert!(
2980 sources
2981 .id_or_new_timestamp_of(&path, InputFormat::Nickel)
2982 .is_err()
2983 );
2984 }
2985}