nickel_lang_core/cache.rs
1//! Various caches for artifacts generated across the whole pipeline: source code, parsed
2//! representations, imports data (dependencies and reverse dependencies, etc.)
3//!
4//! In order to manage the complexity of correctly borrowing such structures, where the arena
5//! allocation of ASTs requires usage of self-borrowing structures, the main cache is split in
6//! different subcaches that can be borrowed independently.
7pub use ast_cache::AstCache;
8
9use crate::{
10 bytecode::ast::{
11 self,
12 compat::{ToAst, ToMainline},
13 Ast, AstAlloc, TryConvert,
14 },
15 closurize::Closurize as _,
16 error::{Error, ImportError, ParseError, ParseErrors, TypecheckError},
17 eval::cache::Cache as EvalCache,
18 eval::Closure,
19 files::{FileId, Files},
20 identifier::LocIdent,
21 metrics::measure_runtime,
22 package::PackageMap,
23 parser::{lexer::Lexer, ErrorTolerantParser, ExtendedTerm},
24 position::TermPos,
25 program::FieldPath,
26 stdlib::{self as nickel_stdlib, StdlibModule},
27 term::{self, RichTerm, Term},
28 transform::{import_resolution, Wildcards},
29 traverse::{Traverse, TraverseOrder},
30 typ::{self as mainline_typ, UnboundTypeVariableError},
31 typecheck::{self, typecheck, HasApparentType, TypecheckMode},
32 {eval, parser, transform},
33};
34
35#[cfg(feature = "nix-experimental")]
36use crate::nix_ffi;
37
38use std::{
39 collections::{hash_map, HashMap, HashSet},
40 ffi::{OsStr, OsString},
41 fmt, fs,
42 io::{self, Read},
43 path::{Path, PathBuf},
44 result::Result,
45 sync::Arc,
46 time::SystemTime,
47};
48
49use ouroboros::self_referencing;
50
51/// Error when trying to add bindings to the typing context where the given term isn't a record
52/// literal.
53pub struct NotARecord;
54
55/// Supported input formats.
56#[derive(Default, Clone, Copy, Eq, Debug, PartialEq, Hash)]
57pub enum InputFormat {
58 #[default]
59 Nickel,
60 Json,
61 Yaml,
62 Toml,
63 #[cfg(feature = "nix-experimental")]
64 Nix,
65 Text,
66}
67
68impl InputFormat {
69 /// Returns an [InputFormat] based on the file extension of a path.
70 pub fn from_path(path: impl AsRef<Path>) -> Option<InputFormat> {
71 match path.as_ref().extension().and_then(OsStr::to_str) {
72 Some("ncl") => Some(InputFormat::Nickel),
73 Some("json") => Some(InputFormat::Json),
74 Some("yaml") | Some("yml") => Some(InputFormat::Yaml),
75 Some("toml") => Some(InputFormat::Toml),
76 #[cfg(feature = "nix-experimental")]
77 Some("nix") => Some(InputFormat::Nix),
78 Some("txt") => Some(InputFormat::Text),
79 _ => None,
80 }
81 }
82
83 pub fn to_str(&self) -> &'static str {
84 match self {
85 InputFormat::Nickel => "Nickel",
86 InputFormat::Json => "Json",
87 InputFormat::Yaml => "Yaml",
88 InputFormat::Toml => "Toml",
89 InputFormat::Text => "Text",
90 #[cfg(feature = "nix-experimental")]
91 InputFormat::Nix => "Nix",
92 }
93 }
94
95 /// Extracts format embedded in SourcePath
96 pub fn from_source_path(source_path: &SourcePath) -> Option<InputFormat> {
97 if let SourcePath::Path(_p, fmt) = source_path {
98 Some(*fmt)
99 } else {
100 None
101 }
102 }
103}
104
105impl fmt::Display for InputFormat {
106 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
107 write!(f, "{}", self.to_str())
108 }
109}
110
111impl std::str::FromStr for InputFormat {
112 type Err = ();
113
114 fn from_str(s: &str) -> Result<Self, Self::Err> {
115 Ok(match s {
116 "Json" => InputFormat::Json,
117 "Nickel" => InputFormat::Nickel,
118 "Text" => InputFormat::Text,
119 "Yaml" => InputFormat::Yaml,
120 "Toml" => InputFormat::Toml,
121 #[cfg(feature = "nix-experimental")]
122 "Nix" => InputFormat::Nix,
123 _ => return Err(()),
124 })
125 }
126}
127
128/// The term cache stores the parsed values (the runtime representation) of sources.
129#[derive(Debug, Clone)]
130pub struct TermCache {
131 /// The term table stores parsed terms corresponding to the entries of the file database.
132 terms: HashMap<FileId, TermEntry>,
133}
134
135#[derive(Debug, Clone, Copy, Eq, PartialEq)]
136pub struct TermNotFound;
137
138impl TermCache {
139 pub fn new() -> Self {
140 TermCache {
141 terms: HashMap::new(),
142 }
143 }
144
145 /// Updates the state of an entry and returns the previous state, or an error if the entry
146 /// isn't in the cache.
147 pub fn update_state(
148 &mut self,
149 file_id: FileId,
150 new: TermEntryState,
151 ) -> Result<TermEntryState, TermNotFound> {
152 self.terms
153 .get_mut(&file_id)
154 .map(|TermEntry { state, .. }| std::mem::replace(state, new))
155 .ok_or(TermNotFound)
156 }
157
158 /// Applies term transformation excepted import resolution, implemented in a separate phase.
159 fn transform(
160 &mut self,
161 wildcards: &WildcardsCache,
162 import_data: &ImportData,
163 file_id: FileId,
164 ) -> Result<CacheOp<()>, TermCacheError<UnboundTypeVariableError>> {
165 match self.terms.get(&file_id).map(|entry| entry.state) {
166 Some(state) if state >= TermEntryState::Transformed => Ok(CacheOp::Cached(())),
167 Some(state) => {
168 if state < TermEntryState::Transforming {
169 let cached_term = self.terms.remove(&file_id).unwrap();
170 let term =
171 transform::transform(cached_term.term, wildcards.wildcards.get(&file_id))?;
172 self.insert(
173 file_id,
174 TermEntry {
175 term,
176 state: TermEntryState::Transforming,
177 ..cached_term
178 },
179 );
180
181 let imported: Vec<_> = import_data.imports(file_id).collect();
182 for file_id in imported {
183 self.transform(wildcards, import_data, file_id)?;
184 }
185
186 // unwrap(): we re-inserted the entry after removal and transformation, so it
187 // should be in the cache.
188 let _ = self
189 .update_state(file_id, TermEntryState::Transformed)
190 .unwrap();
191 }
192
193 Ok(CacheOp::Done(()))
194 }
195 None => Err(CacheError::IncompatibleState {
196 want: TermEntryState::Populated,
197 }),
198 }
199 }
200
201 /// Retrieves the state of an entry. Returns `None` if the entry is not in the term cache. This
202 /// might happen if the file hasn't been parsed, or if the term cache hasn't be filled from the
203 /// AST cache yet. The latter is supposed to happen right before program transformations.
204 pub fn entry_state(&self, file_id: FileId) -> Option<TermEntryState> {
205 self.terms
206 .get(&file_id)
207 .map(|TermEntry { state, .. }| *state)
208 }
209
210 /// Replaces a cache entry by a closurized version of itself. If it contains imports,
211 /// closurize them recursively.
212 ///
213 /// Closurization is not required before evaluation, but it has two benefits:
214 ///
215 /// - the closurized term uses the evaluation cache, so if it is imported in multiple
216 /// places then they will share a cache
217 /// - the eval cache's built-in mechanism for preventing infinite recursion will also
218 /// apply to recursive imports.
219 ///
220 /// The main disadvantage of closurization is that it makes the resulting runtime
221 /// representation less useful. You wouldn't want to closurize before pretty-printing, for
222 /// example. This isn't as important these days, since we also have the AST representation at
223 /// hand.
224 pub fn closurize<C: EvalCache>(
225 &mut self,
226 cache: &mut C,
227 import_data: &ImportData,
228 file_id: FileId,
229 ) -> Result<CacheOp<()>, TermCacheError<()>> {
230 match self.entry_state(file_id) {
231 Some(state) if state >= TermEntryState::Closurized => Ok(CacheOp::Cached(())),
232 Some(_) => {
233 let cached_term = self.terms.remove(&file_id).unwrap();
234 let term = cached_term.term.closurize(cache, eval::Environment::new());
235 self.insert(
236 file_id,
237 TermEntry {
238 term,
239 state: TermEntryState::Closurized,
240 ..cached_term
241 },
242 );
243
244 let imported: Vec<_> = import_data.imports(file_id).collect();
245 for file_id in imported {
246 self.closurize(cache, import_data, file_id)?;
247 }
248
249 Ok(CacheOp::Done(()))
250 }
251 None => Err(CacheError::IncompatibleState {
252 want: TermEntryState::Populated,
253 }),
254 }
255 }
256
257 /// Returns an immutable reference to the whole term cache.
258 pub fn terms(&self) -> &HashMap<FileId, TermEntry> {
259 &self.terms
260 }
261
262 /// Retrieves a fresh clone of a cached term.
263 pub fn get_owned(&self, file_id: FileId) -> Option<RichTerm> {
264 self.terms
265 .get(&file_id)
266 .map(|TermEntry { term, .. }| term.clone())
267 }
268
269 /// Retrieves a reference to a cached term.
270 pub fn get(&self, file_id: FileId) -> Option<&RichTerm> {
271 self.terms.get(&file_id).map(|TermEntry { term, .. }| term)
272 }
273
274 /// Retrieves the whole entry for a given file id.
275 pub fn get_entry(&self, file_id: FileId) -> Option<&TermEntry> {
276 self.terms.get(&file_id)
277 }
278
279 /// Returns `true` if the term cache contains a term for the given file id.
280 pub fn contains(&self, file_id: FileId) -> bool {
281 self.terms.contains_key(&file_id)
282 }
283
284 /// Inserts a new entry in the cache. Usually, this should be handled by [CacheHub] directly,
285 /// but there are some use-cases where it is useful to pre-fill the term cache (typically in
286 /// NLS).
287 pub fn insert(&mut self, file_id: FileId, entry: TermEntry) {
288 self.terms.insert(file_id, entry);
289 }
290}
291
292/// This is a temporary fix for [#2362](https://github.com/tweag/nickel/issues/2362). File paths
293/// prefixed with this are treated specially: they can refer to in-memory source. To build an
294/// import expression that refers to an in-memory source, append the source name to this prefix and
295/// use it as the path: `format!({IN_MEMORY_SOURCE_PATH_PREFIX}{src_name})`.
296pub const IN_MEMORY_SOURCE_PATH_PREFIX: &str = "%inmem_src%:";
297
298/// The source cache handles reading textual data from the file system or other souces and storing
299/// it in a [Files] instance.
300///
301/// While not ideal, we have to make most of the fields public to allow the LSP to perform its own
302/// import resolution.
303#[derive(Clone)]
304pub struct SourceCache {
305 /// The content of the program sources plus imports.
306 pub files: Files,
307 /// Reverse map from file ids to source paths.
308 pub file_paths: HashMap<FileId, SourcePath>,
309 /// The name-id table, holding file ids stored in the database indexed by source names.
310 pub file_ids: HashMap<SourcePath, NameIdEntry>,
311 /// Paths where to look for imports, as included by the user through either the CLI argument
312 /// `--import-path` or the environment variable `$NICKEL_IMPORT_PATH`.
313 pub import_paths: Vec<PathBuf>,
314 /// A table mapping FileIds to the package that they belong to.
315 ///
316 /// Path dependencies have already been canonicalized to absolute paths.
317 pub packages: HashMap<FileId, PathBuf>,
318 /// The map used to resolve package imports.
319 pub package_map: Option<PackageMap>,
320}
321
322impl SourceCache {
323 pub fn new() -> Self {
324 SourceCache {
325 files: Files::new(),
326 file_paths: HashMap::new(),
327 file_ids: HashMap::new(),
328 import_paths: Vec::new(),
329 packages: HashMap::new(),
330 package_map: None,
331 }
332 }
333
334 /// Retrieves the name of a source given an id.
335 pub fn name(&self, file_id: FileId) -> &OsStr {
336 self.files.name(file_id)
337 }
338
339 /// Add paths to the import path list, where the resolver is looking for imported files.
340 pub fn add_import_paths<P>(&mut self, paths: impl Iterator<Item = P>)
341 where
342 PathBuf: From<P>,
343 {
344 self.import_paths.extend(paths.map(PathBuf::from));
345 }
346
347 /// Sets the package map to use for package import resolution.
348 pub fn set_package_map(&mut self, map: PackageMap) {
349 self.package_map = Some(map);
350 }
351
352 /// Same as [Self::add_file], but assumes that the path is already normalized and takes the
353 /// timestamp as a parameter.
354 fn add_normalized_file(
355 &mut self,
356 path: PathBuf,
357 format: InputFormat,
358 timestamp: SystemTime,
359 ) -> io::Result<FileId> {
360 let contents = std::fs::read_to_string(&path)?;
361 let file_id = self.files.add(&path, contents);
362
363 self.file_paths
364 .insert(file_id, SourcePath::Path(path.clone(), format));
365 self.file_ids.insert(
366 SourcePath::Path(path, format),
367 NameIdEntry {
368 id: file_id,
369 source: SourceKind::Filesystem(timestamp),
370 },
371 );
372 Ok(file_id)
373 }
374
375 /// Loads a file and adds it to the name-id table.
376 ///
377 /// Uses the normalized path and the *modified at* timestamp as the name-id table entry.
378 /// Overrides any existing entry with the same name.
379 pub fn add_file(
380 &mut self,
381 path: impl Into<OsString>,
382 format: InputFormat,
383 ) -> io::Result<FileId> {
384 let path = path.into();
385 let timestamp = timestamp(&path)?;
386 let normalized = normalize_path(&path)?;
387 self.add_normalized_file(normalized, format, timestamp)
388 }
389
390 /// Try to retrieve the id of a file from the cache.
391 ///
392 /// If it was not in cache, try to read it and add it as a new entry.
393 ///
394 /// # In memory sources
395 ///
396 /// As a temporary fix for [#2362](https://github.com/tweag/nickel/issues/2362), if a file path
397 /// starts with [IN_MEMORY_SOURCE_PATH_PREFIX], the suffix is looked up un-normalized value
398 /// first, which makes it possible to hit in-memory only sources by importing a path
399 /// `"{SOURCE_PATH_PREFX}{src_name}"`. If it can't be found, it is looked up normally, so that
400 /// it doesn't break strange file names that happen to contain the source path prefix.
401 ///
402 /// It is theoretically possible that if both the source "abc" and the file
403 /// "{IN_MEMORY_SOURCE_PATH_PREFIX}abc" exist, the source is imported instead of the intended
404 /// file. However, given the prefix, it just can't be accidental. As we want to give access to
405 /// in-memory sources in any case, although this can be surprising, I don't see any obvious
406 /// attack scenario here. This fix is also intended to be temporary. If you still need to make
407 /// sure this doesn't happen, one way would be to add some randomness to the name of the
408 /// sources, so that they can't be predicted beforehand.
409 pub fn get_or_add_file(
410 &mut self,
411 path: impl Into<OsString>,
412 format: InputFormat,
413 ) -> io::Result<CacheOp<FileId>> {
414 let path = path.into();
415 let normalized = normalize_path(&path)?;
416
417 // Try to fetch a generated source if the path starts with a hardcoded prefix
418 let generated_entry = path
419 .to_str()
420 .and_then(|p| p.strip_prefix(IN_MEMORY_SOURCE_PATH_PREFIX))
421 .and_then(|src_name| {
422 self.file_ids
423 .get(&SourcePath::Path(src_name.into(), format))
424 });
425
426 if let Some(entry) = generated_entry {
427 return Ok(CacheOp::Cached(entry.id));
428 }
429
430 match self.id_or_new_timestamp_of(normalized.as_ref(), format)? {
431 SourceState::UpToDate(id) => Ok(CacheOp::Cached(id)),
432 SourceState::Stale(timestamp) => self
433 .add_normalized_file(normalized, format, timestamp)
434 .map(CacheOp::Done),
435 }
436 }
437
438 /// Load a source and add it to the name-id table.
439 ///
440 /// Do not check if a source with the same name already exists: if it is the case,
441 /// [Self::add_source] will happily will override the old entry in the name-id table.
442 pub fn add_source<T>(&mut self, source_name: SourcePath, mut source: T) -> io::Result<FileId>
443 where
444 T: Read,
445 {
446 let mut buffer = String::new();
447 source.read_to_string(&mut buffer)?;
448 Ok(self.add_string(source_name, buffer))
449 }
450
451 /// Returns the content of a file.
452 ///
453 /// Panics if the file id is invalid.
454 pub fn source(&self, id: FileId) -> &str {
455 self.files.source(id)
456 }
457
458 /// Returns a cloned `Arc` to the content of the file.
459 ///
460 /// The `Arc` is here for the LSP, where the background evaluation is handled by background
461 /// threads and processes.
462 ///
463 /// Panics if the file id is invalid.
464 pub fn clone_source(&self, id: FileId) -> Arc<str> {
465 self.files.clone_source(id)
466 }
467
468 /// Loads a new source as a string and add it to the name-id table.
469 ///
470 /// Do not check if a source with the same name already exists: if it is the case, this one
471 /// will override the old entry in the name-id table but the old `FileId` will remain valid.
472 pub fn add_string(&mut self, source_name: SourcePath, s: String) -> FileId {
473 let id = self.files.add(source_name.clone(), s);
474
475 self.file_paths.insert(id, source_name.clone());
476 self.file_ids.insert(
477 source_name,
478 NameIdEntry {
479 id,
480 source: SourceKind::Memory,
481 },
482 );
483 id
484 }
485
486 /// Loads a new source as a string, replacing any existing source with the same name.
487 ///
488 /// As opposed to [CacheHub::replace_string], this method doesn't update the other caches. It
489 /// just affects the source cache.
490 pub fn replace_string(&mut self, source_name: SourcePath, s: String) -> FileId {
491 if let Some(file_id) = self.id_of(&source_name) {
492 // The file may have been originally loaded from the filesystem and then
493 // updated by the LSP, so the SourceKind needs to be updated to Memory.
494 self.file_ids.insert(
495 source_name,
496 NameIdEntry {
497 id: file_id,
498 source: SourceKind::Memory,
499 },
500 );
501 self.files.update(file_id, s);
502 file_id
503 } else {
504 // We re-use [Self::add_string] here to properly fill the file_paths and file_ids
505 // tables.
506 self.add_string(source_name, s)
507 }
508 }
509
510 /// Closes a file that has been opened in memory and reloads it from the filesystem.
511 /// Returns the file ID of the replacement file loaded from the filesystem.
512 pub fn close_in_memory_file(
513 &mut self,
514 path: PathBuf,
515 format: InputFormat,
516 ) -> Result<FileCloseResult, FileCloseError> {
517 let entry = self
518 .file_ids
519 .get_mut(&SourcePath::Path(path.clone(), format))
520 .ok_or(FileCloseError::FileIdNotFound)?;
521 match &entry.source {
522 SourceKind::Memory => {
523 let closed_id = entry.id;
524 entry.source = SourceKind::MemoryClosed;
525 let replacement_id = self.get_or_add_file(path, format).map(|op| op.inner());
526 Ok(FileCloseResult {
527 closed_id,
528 replacement_id,
529 })
530 }
531 _ => Err(FileCloseError::FileNotOpen),
532 }
533 }
534
535 /// Retrieves the id of a source given a name.
536 ///
537 /// Note that files added via [Self::add_file] are indexed by their full normalized path (cf
538 /// [normalize_path]).
539 pub fn id_of(&self, name: &SourcePath) -> Option<FileId> {
540 match name {
541 SourcePath::Path(p, fmt) => match self.id_or_new_timestamp_of(p, *fmt).ok()? {
542 SourceState::UpToDate(id) => Some(id),
543 SourceState::Stale(_) => None,
544 },
545 name => Some(self.file_ids.get(name)?.id),
546 }
547 }
548
549 /// Tries to retrieve the id of a cached source.
550 ///
551 /// Only returns `Ok` if the source is up-to-date; if the source is stale, returns
552 /// either the new timestamp of the up-to-date file or the error we encountered when
553 /// trying to read it (which most likely means there was no such file).
554 ///
555 /// The main point of this awkward signature is to minimize I/O operations: if we accessed
556 /// the timestamp, keep it around.
557 fn id_or_new_timestamp_of(&self, name: &Path, format: InputFormat) -> io::Result<SourceState> {
558 match self
559 .file_ids
560 .get(&SourcePath::Path(name.to_owned(), format))
561 {
562 None
563 | Some(NameIdEntry {
564 source: SourceKind::MemoryClosed,
565 ..
566 }) => Ok(SourceState::Stale(timestamp(name)?)),
567 Some(NameIdEntry {
568 id,
569 source: SourceKind::Filesystem(ts),
570 }) => {
571 let new_timestamp = timestamp(name)?;
572 if ts == &new_timestamp {
573 Ok(SourceState::UpToDate(*id))
574 } else {
575 Ok(SourceState::Stale(new_timestamp))
576 }
577 }
578 Some(NameIdEntry {
579 id,
580 source: SourceKind::Memory,
581 }) => Ok(SourceState::UpToDate(*id)),
582 }
583 }
584
585 /// Gets a reference to the underlying files. Required by the WASM REPL error reporting code
586 /// and LSP functions.
587 pub fn files(&self) -> &Files {
588 &self.files
589 }
590
591 /// Parses a Nickel source without querying nor populating other caches.
592 pub fn parse_nickel<'ast>(
593 &self,
594 // We take the allocator explicitly, to make sure `self.asts` is properly initialized
595 // before calling this function, and won't be dropped.
596 alloc: &'ast AstAlloc,
597 file_id: FileId,
598 ) -> Result<Ast<'ast>, ParseErrors> {
599 parse_nickel(alloc, file_id, self.files.source(file_id))
600 }
601
602 /// Parses a source that isn't Nickel code without querying nor populating the other caches. Support
603 /// multiple formats.
604 ///
605 /// The Nickel/non Nickel distinction is a bit artificial at the moment, due to the fact that
606 /// parsing Nickel returns the new [crate::bytecode::ast::Ast], while parsing other formats
607 /// don't go through the new AST first but directly deserialize to the legacy
608 /// [crate::term::Term] for simplicity and performance reasons.
609 ///
610 /// Once RFC007 is fully implemented, we might clean it up.
611 ///
612 /// # Panic
613 ///
614 /// This function panics if `format` is [InputFormat::Nickel].
615 pub fn parse_other(
616 &self,
617 file_id: FileId,
618 format: InputFormat,
619 ) -> Result<RichTerm, ParseError> {
620 let attach_pos = |t: RichTerm| -> RichTerm {
621 let pos: TermPos = self.files.source_span(file_id).into();
622 t.with_pos(pos)
623 };
624
625 let source = self.files.source(file_id);
626
627 match format {
628 InputFormat::Nickel => {
629 // Panicking isn't great, but we expect this to be temporary, until RFC007 is fully
630 // implemented. And this case is an internal bug.
631 panic!("error: trying to parse a Nickel source with parse_other_nocache")
632 }
633 InputFormat::Json => serde_json::from_str(source)
634 .map(attach_pos)
635 .map_err(|err| ParseError::from_serde_json(err, file_id, &self.files)),
636 InputFormat::Yaml => crate::serialize::yaml::load_yaml_term(source, Some(file_id)),
637 InputFormat::Toml => crate::serialize::toml_deser::from_str(source, file_id)
638 .map(attach_pos)
639 .map_err(|err| (ParseError::from_toml(err, file_id))),
640 #[cfg(feature = "nix-experimental")]
641 InputFormat::Nix => {
642 let json = nix_ffi::eval_to_json(source, &self.get_base_dir_for_nix(file_id))
643 .map_err(|e| ParseError::from_nix(e.what(), file_id))?;
644 serde_json::from_str(&json)
645 .map(attach_pos)
646 .map_err(|err| ParseError::from_serde_json(err, file_id, &self.files))
647 }
648 InputFormat::Text => Ok(attach_pos(Term::Str(source.into()).into())),
649 }
650 }
651
652 /// Returns true if a particular file id represents a Nickel standard library file, false
653 /// otherwise.
654 pub fn is_stdlib_module(&self, file: FileId) -> bool {
655 self.files.is_stdlib(file)
656 }
657
658 /// Retrieves the file id for a given standard libray module.
659 pub fn get_submodule_file_id(&self, module: StdlibModule) -> Option<FileId> {
660 self.stdlib_modules()
661 .find(|(m, _id)| m == &module)
662 .map(|(_, id)| id)
663 }
664
665 /// Returns the list of file ids corresponding to the standard library modules.
666 pub fn stdlib_modules(&self) -> impl Iterator<Item = (StdlibModule, FileId)> {
667 self.files.stdlib_modules()
668 }
669
670 /// Return the format of a given source. Returns `None` if there is no entry in the source
671 /// cache for `file_id`, or if there is no well-defined input format (e.g. for REPL inputs,
672 /// field assignments, etc.).
673 pub fn input_format(&self, file_id: FileId) -> Option<InputFormat> {
674 self.file_paths
675 .get(&file_id)
676 .and_then(|source| match source {
677 SourcePath::Path(_, input_format) => Some(*input_format),
678 SourcePath::Std(_) => Some(InputFormat::Nickel),
679 SourcePath::Snippet(_)
680 | SourcePath::Query
681 | SourcePath::ReplInput(_)
682 | SourcePath::ReplTypecheck
683 | SourcePath::ReplQuery
684 | SourcePath::CliFieldAssignment
685 | SourcePath::Override(_)
686 | SourcePath::Generated(_) => None,
687 })
688 }
689
690 /// Returns the base path for Nix evaluation, which is the parent directory of the source file
691 /// if any, or the current working directory, or an empty path if we couldn't find any better.
692 #[cfg(feature = "nix-experimental")]
693 fn get_base_dir_for_nix(&self, file_id: FileId) -> PathBuf {
694 let parent_dir = self
695 .file_paths
696 .get(&file_id)
697 .and_then(|source_path| Path::new(<&OsStr>::try_from(source_path).ok()?).parent());
698
699 parent_dir
700 .map(PathBuf::from)
701 .or_else(|| std::env::current_dir().ok())
702 .unwrap_or_default()
703 }
704}
705
706/// Stores the mapping of each wildcard id to its inferred type, for each file in the cache.
707#[derive(Default, Clone, Debug)]
708pub struct WildcardsCache {
709 wildcards: HashMap<FileId, Wildcards>,
710}
711
712impl WildcardsCache {
713 pub fn new() -> Self {
714 Self::default()
715 }
716
717 pub fn get(&self, file_id: FileId) -> Option<&Wildcards> {
718 self.wildcards.get(&file_id)
719 }
720}
721
722/// Metadata about an imported file.
723#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
724pub struct ImportTarget {
725 pub file_id: FileId,
726 pub format: InputFormat,
727}
728
729/// Stores dependencies and reverse dependencies data between sources.
730#[derive(Default, Clone)]
731pub struct ImportData {
732 /// A map containing for each FileId a list of files they import (directly).
733 pub imports: HashMap<FileId, HashSet<ImportTarget>>,
734 /// A map containing for each FileId a list of files importing them (directly). Note that we
735 /// don't need to store the format here, as only Nickel files can import other files. We do
736 /// however store the position of the first import expression (the same file can be imported
737 /// many times from a given file), for error reporting purpose.
738 pub rev_imports: HashMap<FileId, HashMap<FileId, TermPos>>,
739}
740
741impl ImportData {
742 pub fn new() -> Self {
743 Self::default()
744 }
745
746 /// Returns the set of files that this file imports.
747 pub fn imports(&self, file: FileId) -> impl Iterator<Item = FileId> + '_ {
748 self.imports
749 .get(&file)
750 .into_iter()
751 .flat_map(|s| s.iter())
752 .map(|tgt| tgt.file_id)
753 }
754
755 /// Returns the set of files that import this file.
756 pub fn rev_imports(&self, file: FileId) -> impl Iterator<Item = FileId> + '_ {
757 self.rev_imports
758 .get(&file)
759 .into_iter()
760 .flat_map(|h| h.keys())
761 .copied()
762 }
763
764 /// Returns the set of files that transitively depend on this file.
765 pub fn transitive_rev_imports(&self, file: FileId) -> HashSet<FileId> {
766 let mut ret = HashSet::new();
767 let mut stack = vec![file];
768
769 while let Some(file) = stack.pop() {
770 for f in self.rev_imports(file) {
771 if ret.insert(f) {
772 stack.push(f);
773 }
774 }
775 }
776
777 ret
778 }
779
780 /// Returns the set of files that this file transitively depends on.
781 pub fn transitive_imports(&self, file: FileId) -> HashSet<FileId> {
782 let mut ret = HashSet::new();
783 let mut stack = vec![file];
784
785 while let Some(file) = stack.pop() {
786 for f in self.imports(file) {
787 if ret.insert(f) {
788 stack.push(f);
789 }
790 }
791 }
792
793 ret
794 }
795
796 /// Returns `true` if those import data are empty.
797 pub fn is_empty(&self) -> bool {
798 self.imports.is_empty() && self.rev_imports.is_empty()
799 }
800}
801
802/// The cache hub aggregates the various kind of source-related caches used by Nickel.
803///
804/// [CacheHub] handles parsing, typechecking and program transformation of sources, as well as
805/// caching the corresponding artifacts (text, ASTs, state). This is the central entry point for
806/// other modules.
807///
808/// # RFC007
809///
810/// As part of the migration to a new AST required by RFC007, as long as we don't have a fully
811/// working bytecode virtual machine, the cache needs to keep parsed expressions both as the old
812/// representation (dubbed "mainline" or the runtime representation in many places) and as the new
813/// AST representation.
814pub struct CacheHub {
815 pub terms: TermCache,
816 pub sources: SourceCache,
817 pub asts: AstCache,
818 pub wildcards: WildcardsCache,
819 pub import_data: ImportData,
820 #[cfg(debug_assertions)]
821 /// Skip loading the stdlib, used for debugging purpose
822 pub skip_stdlib: bool,
823}
824
825impl CacheHub {
826 pub fn new() -> Self {
827 CacheHub {
828 terms: TermCache::new(),
829 sources: SourceCache::new(),
830 asts: AstCache::empty(),
831 wildcards: WildcardsCache::new(),
832 import_data: ImportData::new(),
833 #[cfg(debug_assertions)]
834 skip_stdlib: false,
835 }
836 }
837
838 /// Actual implementation of [Self::parse_ast] which doesn't take `self` as a parameter, so that it
839 /// can be reused from other places when we don't have a full [CacheHub] instance at hand.
840 fn parse_ast_impl(
841 asts: &mut AstCache,
842 sources: &mut SourceCache,
843 file_id: FileId,
844 ) -> Result<CacheOp<()>, ParseErrors> {
845 if asts.contains(file_id) {
846 Ok(CacheOp::Cached(()))
847 } else {
848 let _ = asts.parse_nickel(file_id, sources.files.source(file_id))?;
849 Ok(CacheOp::Done(()))
850 }
851 }
852
853 /// Parse a REPL input and populate the corresponding entry in the cache.
854 ///
855 /// The first component of the tuple in the `Ok` case is the identifier of the toplevel let, if
856 /// the input is a toplevel let, or `None` if the input is a standard Nickel expression.
857 ///
858 /// # RFC007
859 ///
860 /// This method populates both the ast cache and the term cache at once.
861 pub fn parse_repl(
862 &mut self,
863 file_id: FileId,
864 ) -> Result<CacheOp<Option<LocIdent>>, ParseErrors> {
865 // Since we need the identifier, we always reparse the input. In any case, it doesn't
866 // happen that we the same REPL input twice right now, so caching it is in fact useless.
867 // It's just must simpler to reuse the cache infrastructure than to reimplement the whole
868 // transformations and import dependencies tracking elsewhere.
869 let extd_ast = self
870 .asts
871 .parse_nickel_repl(file_id, self.sources.files.source(file_id))?;
872
873 let (id, ast) = match extd_ast {
874 ExtendedTerm::Term(t) => (None, t),
875 ExtendedTerm::ToplevelLet(id, t) => (Some(id), t),
876 };
877
878 let term = measure_runtime!("runtime:ast_conversion", ast.to_mainline());
879
880 self.terms.insert(
881 file_id,
882 TermEntry {
883 term,
884 state: TermEntryState::default(),
885 format: InputFormat::Nickel,
886 },
887 );
888
889 Ok(CacheOp::Done(id))
890 }
891
892 /// Parses a source and populate the corresponding entry in the AST cache, or do nothing if the
893 /// entry has already been parsed. External input formats are currently directly parsed to the
894 /// runtime representation, without going through an AST: currently, the format is assumed to
895 /// be [InputFormat::Nickel] in this method. See [Self::parse_to_term] for other formats.
896 ///
897 /// # RFC007
898 ///
899 /// This method only populates the AST cache. The term cache must be filled separately.
900 pub fn parse_to_ast(&mut self, file_id: FileId) -> Result<CacheOp<()>, ParseErrors> {
901 Self::parse_ast_impl(&mut self.asts, &mut self.sources, file_id)
902 }
903
904 /// Parses a source or compiles an AST into the term cache:
905 ///
906 /// - if the entry is already in the term cache, do nothing.
907 /// - if the format is Nickel and there is a corresponding entry in the AST cache, converts the
908 /// parsed AST to a [RichTerm] and put it in the term cache.
909 /// - if the format is Nickel but there is no cached AST, or if the format is not Nickel, parse
910 /// the input directly into the term cache.
911 ///
912 /// Mostly used during ([RichTerm]-based) import resolution.
913 pub fn parse_to_term(
914 &mut self,
915 file_id: FileId,
916 format: InputFormat,
917 ) -> Result<CacheOp<()>, ParseErrors> {
918 if self.terms.contains(file_id) {
919 return Ok(CacheOp::Cached(()));
920 }
921
922 let term = if let InputFormat::Nickel = format {
923 match self.compile(file_id) {
924 Ok(cache_op) => return Ok(cache_op),
925 Err(_) => {
926 let alloc = AstAlloc::new();
927 self.sources.parse_nickel(&alloc, file_id)?.to_mainline()
928 }
929 }
930 } else {
931 self.sources.parse_other(file_id, format)?
932 };
933
934 self.terms.insert(
935 file_id,
936 TermEntry {
937 term,
938 state: TermEntryState::default(),
939 format,
940 },
941 );
942
943 Ok(CacheOp::Done(()))
944 }
945
946 /// Typecheck an entry of the cache and update its state accordingly, or do nothing if the
947 /// entry has already been typechecked. Require that the corresponding source has been parsed.
948 /// If the source contains imports, [Self::typecheck] recursively typechecks the imports as
949 /// well.
950 ///
951 /// # RFC007
952 ///
953 /// During the transition period between the old VM and the new bytecode VM, this method
954 /// performs typechecking on the new representation [crate::bytecode::ast::Ast].
955 pub fn typecheck(
956 &mut self,
957 file_id: FileId,
958 initial_mode: TypecheckMode,
959 ) -> Result<CacheOp<()>, AstCacheError<TypecheckError>> {
960 let (slice, asts) = self.split_asts();
961 asts.typecheck(slice, file_id, initial_mode)
962 }
963
964 /// Returns the apparent type of an entry that has been typechecked with wildcards substituted.
965 pub fn type_of(
966 &mut self,
967 file_id: FileId,
968 ) -> Result<CacheOp<mainline_typ::Type>, AstCacheError<TypecheckError>> {
969 let (slice, asts) = self.split_asts();
970 asts.type_of(slice, file_id)
971 }
972
973 /// Prepares a source for evaluation: parse, typecheck and apply program transformations, if it
974 /// was not already done.
975 pub fn prepare(&mut self, file_id: FileId) -> Result<CacheOp<()>, Error> {
976 self.prepare_impl(file_id, true)
977 }
978
979 /// Prepare a file for evaluation only. Same as [Self::prepare], but doesn't typecheck the
980 /// source.
981 pub fn prepare_eval_only(&mut self, file_id: FileId) -> Result<CacheOp<()>, Error> {
982 self.prepare_impl(file_id, false)
983 }
984
985 /// Common implementation for [Self::prepare] and [Self::prepare_eval_only], which optionally
986 /// skips typechecking.
987 fn prepare_impl(&mut self, file_id: FileId, typecheck: bool) -> Result<CacheOp<()>, Error> {
988 let mut result = CacheOp::Cached(());
989
990 let format = self
991 .sources
992 .file_paths
993 .get(&file_id)
994 .and_then(InputFormat::from_source_path)
995 .unwrap_or_default();
996
997 if let InputFormat::Nickel = format {
998 if let CacheOp::Done(_) = self.parse_to_ast(file_id)? {
999 result = CacheOp::Done(());
1000 }
1001
1002 if typecheck {
1003 let (slice, asts) = self.split_asts();
1004
1005 let typecheck_res = asts
1006 .typecheck(slice, file_id, TypecheckMode::Walk)
1007 .map_err(|cache_err| {
1008 cache_err.unwrap_error(
1009 "cache::prepare(): expected source to be parsed before typechecking",
1010 )
1011 })?;
1012
1013 if typecheck_res == CacheOp::Done(()) {
1014 result = CacheOp::Done(());
1015 };
1016 }
1017 }
1018 // Non-Nickel terms are currently not parsed as ASTs, but directly as the runtime
1019 // representation. While the imports of the main file will be parsed to terms by the
1020 // `compile_and_transform` automatically, we do need to ensure that the main file is in the
1021 // term cache if it's an external format, or `compile_and_transform` will complain.
1022 else if let CacheOp::Done(_) = self.parse_to_term(file_id, format)? {
1023 result = CacheOp::Done(());
1024 }
1025
1026 let transform_res = self.compile_and_transform(file_id).map_err(|cache_err| {
1027 cache_err.unwrap_error(
1028 "cache::prepare(): expected source to be parsed before transformations",
1029 )
1030 })?;
1031
1032 if transform_res == CacheOp::Done(()) {
1033 result = CacheOp::Done(());
1034 };
1035
1036 Ok(result)
1037 }
1038
1039 /// Prepare an REPL snippet for evaluation: parse, typecheck and apply program transformations,
1040 /// if it was not already done. The difference with [Self::prepare] is that this method also
1041 /// accept toplevel binding `let <id> = <value>`.
1042 ///
1043 /// Returns the identifier of the toplevel let, if the input is a toplevel let, or `None` if
1044 /// the input is a standard Nickel expression.
1045 pub fn prepare_repl(&mut self, file_id: FileId) -> Result<CacheOp<Option<LocIdent>>, Error> {
1046 let mut done = false;
1047
1048 let parsed = self.parse_repl(file_id)?;
1049
1050 done = done || matches!(parsed, CacheOp::Done(_));
1051
1052 let id = parsed.inner();
1053
1054 let (slice, asts) = self.split_asts();
1055 let typecheck_res = asts
1056 .typecheck(slice, file_id, TypecheckMode::Walk)
1057 .map_err(|cache_err| {
1058 cache_err.unwrap_error(
1059 "cache::prepare_repl(): expected source to be parsed before typechecking",
1060 )
1061 })?;
1062
1063 if let Some(id) = id {
1064 let (slice, asts) = self.split_asts();
1065 asts
1066 .add_type_binding(
1067 slice,
1068 id,
1069 file_id,
1070 ).expect("cache::prepare_repl(): expected source to be parsed before augmenting the type environment");
1071 }
1072
1073 done = done || matches!(typecheck_res, CacheOp::Done(_));
1074
1075 let transform_res = self.compile_and_transform(file_id).map_err(|cache_err| {
1076 cache_err.unwrap_error(
1077 "cache::prepare(): expected source to be parsed before transformations",
1078 )
1079 })?;
1080
1081 done = done || matches!(transform_res, CacheOp::Done(_));
1082
1083 if done {
1084 Ok(CacheOp::Done(id))
1085 } else {
1086 Ok(CacheOp::Cached(id))
1087 }
1088 }
1089
1090 /// Proxy for [TermCache::transform].
1091 fn transform(
1092 &mut self,
1093 file_id: FileId,
1094 ) -> Result<CacheOp<()>, TermCacheError<UnboundTypeVariableError>> {
1095 self.terms
1096 .transform(&self.wildcards, &self.import_data, file_id)
1097 }
1098
1099 /// Loads and parse the standard library in the AST cache.
1100 ///
1101 /// # RFC007
1102 ///
1103 /// This method doesn't populate the term cache. Use [Self::compile_stdlib] afterwards.
1104 pub fn load_stdlib(&mut self) -> Result<CacheOp<()>, Error> {
1105 let mut ret = CacheOp::Cached(());
1106
1107 for (_, file_id) in self.sources.stdlib_modules() {
1108 if let CacheOp::Done(_) = self.parse_to_ast(file_id)? {
1109 ret = CacheOp::Done(());
1110 }
1111 }
1112
1113 Ok(ret)
1114 }
1115
1116 /// Converts the parsed standard library to the runtime representation.
1117 pub fn compile_stdlib(&mut self) -> Result<CacheOp<()>, AstCacheError<()>> {
1118 let mut ret = CacheOp::Cached(());
1119
1120 for (_, file_id) in self.sources.stdlib_modules() {
1121 let result = self.compile(file_id).map_err(|cache_err| {
1122 if let CacheError::IncompatibleState { want } = cache_err {
1123 CacheError::IncompatibleState { want }
1124 } else {
1125 unreachable!("unexpected parse error during the compilation of stdlib")
1126 }
1127 })?;
1128
1129 if let CacheOp::Done(_) = result {
1130 ret = CacheOp::Done(());
1131 }
1132 }
1133
1134 Ok(ret)
1135 }
1136
1137 /// Typechecks the standard library. Currently only used in the test suite.
1138 pub fn typecheck_stdlib(&mut self) -> Result<CacheOp<()>, AstCacheError<TypecheckError>> {
1139 let (slice, asts) = self.split_asts();
1140 asts.typecheck_stdlib(slice)
1141 }
1142
1143 /// Loads, parses, and compiles the standard library. We don't typecheck for performance
1144 /// reasons: this is done in the test suite.
1145 pub fn prepare_stdlib(&mut self) -> Result<(), Error> {
1146 #[cfg(debug_assertions)]
1147 if self.skip_stdlib {
1148 return Ok(());
1149 }
1150
1151 self.load_stdlib()?;
1152 // unwrap(): we just loaded the stdlib, so it must be parsed in the cache.
1153 self.compile_stdlib().unwrap();
1154
1155 self.sources
1156 .stdlib_modules()
1157 // We need to handle the internals module separately. Each field
1158 // is bound directly in the environment without evaluating it first, so we can't
1159 // tolerate top-level let bindings that would be introduced by `transform`.
1160 .try_for_each(|(_, file_id)| self.transform(file_id).map(|_| ()))
1161 .map_err(|cache_err: TermCacheError<UnboundTypeVariableError>| {
1162 Error::ParseErrors(
1163 cache_err
1164 .unwrap_error(
1165 "cache::prepare_stdlib(): unexpected unbound type variable error during stdlib loading",
1166 )
1167 .into(),
1168 )
1169 })?;
1170
1171 Ok(())
1172 }
1173
1174 /// Applies a custom transform to an input and its imports. [CacheError::IncompatibleState] is returned
1175 /// if the file has not yet been typechecked.
1176 ///
1177 /// If multiple invocations of `custom_transform` are needed, you must supply `transform_id` with
1178 /// with a number higher than that of all previous invocations.
1179 pub fn custom_transform<E>(
1180 &mut self,
1181 file_id: FileId,
1182 transform_id: usize,
1183 f: &mut impl FnMut(&mut CacheHub, RichTerm) -> Result<RichTerm, E>,
1184 ) -> Result<(), TermCacheError<E>> {
1185 match self.terms.entry_state(file_id) {
1186 None => Err(CacheError::IncompatibleState {
1187 want: TermEntryState::Populated,
1188 }),
1189 Some(state) => {
1190 if state.needs_custom_transform(transform_id) {
1191 let cached_term = self.terms.terms.remove(&file_id).unwrap();
1192 let term = f(self, cached_term.term)?;
1193 self.terms.insert(
1194 file_id,
1195 TermEntry {
1196 term,
1197 state: TermEntryState::CustomTransforming,
1198 ..cached_term
1199 },
1200 );
1201
1202 let imported: Vec<_> = self.import_data.imports(file_id).collect();
1203 for file_id in imported {
1204 self.custom_transform(file_id, transform_id, f)?;
1205 }
1206
1207 // TODO: We're setting the state back to whatever it was.
1208 // unwrap(): we inserted the term just above
1209 let _ = self
1210 .terms
1211 .update_state(file_id, TermEntryState::CustomTransformed { transform_id })
1212 .unwrap();
1213 }
1214
1215 Ok(())
1216 }
1217 }
1218 }
1219
1220 /// Resolves every imports of a term entry of the cache, and update its state accordingly, or
1221 /// do nothing if the imports of the entry have already been resolved or if they aren't Nickel
1222 /// inputs. Require that the corresponding source has been parsed.
1223 ///
1224 /// If resolved imports contain imports themselves, resolve them recursively. Returns a tuple
1225 /// of vectors, where the first component is the imports that were transitively resolved, and
1226 /// the second component is the errors it encountered while resolving imports in `file_id`,
1227 /// respectively. Imports that were already resolved before are not included in the first
1228 /// component: this return value is currently used by the LSP to re-run code analysis on new
1229 /// files/modified files.
1230 ///
1231 /// The resolved imports are ordered by a pre-order depth-first-search. In particular, earlier
1232 /// elements in the returned list might import later elements but -- unless there are cyclic
1233 /// imports -- later elements do not import earlier elements.
1234 ///
1235 /// It only accumulates errors if the cache is in error tolerant mode, otherwise it returns an
1236 /// `Err(..)` containing a `CacheError`.
1237 ///
1238 /// # RFC007
1239 ///
1240 /// This method is still needed only because the evaluator can't handle un-resolved import, so
1241 /// we need to replace them by resolved imports. However, actual import resolution (loading and
1242 /// parsing files for the first time) is now driven by typechecking directly.
1243 pub fn resolve_imports(
1244 &mut self,
1245 file_id: FileId,
1246 ) -> Result<CacheOp<Vec<FileId>>, TermCacheError<ImportError>> {
1247 let entry = self.terms.terms.get(&file_id);
1248
1249 match entry {
1250 Some(TermEntry {
1251 state,
1252 term,
1253 format: InputFormat::Nickel,
1254 }) if *state < TermEntryState::ImportsResolving => {
1255 let term = term.clone();
1256
1257 let import_resolution::strict::ResolveResult {
1258 transformed_term,
1259 resolved_ids: pending,
1260 } = import_resolution::strict::resolve_imports(term, self)?;
1261
1262 // unwrap(): we called `unwrap()` at the beginning of the enclosing if branch
1263 // on the result of `self.terms.get(&file_id)`. We only made recursive calls to
1264 // `resolve_imports` in between, which don't remove anything from `self.terms`.
1265 let cached_term = self.terms.terms.get_mut(&file_id).unwrap();
1266 cached_term.term = transformed_term;
1267 cached_term.state = TermEntryState::ImportsResolving;
1268
1269 let mut done = Vec::new();
1270
1271 // Transitively resolve the imports, and accumulate the ids of the resolved
1272 // files along the way.
1273 for id in pending {
1274 if let CacheOp::Done(mut done_local) = self.resolve_imports(id)? {
1275 done.push(id);
1276 done.append(&mut done_local)
1277 }
1278 }
1279
1280 // unwrap(): if we are in this branch, the term is present in the cache
1281 let _ = self
1282 .terms
1283 .update_state(file_id, TermEntryState::ImportsResolved)
1284 .unwrap();
1285
1286 Ok(CacheOp::Done(done))
1287 }
1288 // There's no import to resolve for non-Nickel inputs. We still update the state.
1289 Some(TermEntry { state, .. }) if *state < TermEntryState::ImportsResolving => {
1290 // unwrap(): if we are in this branch, the term is present in the cache
1291 let _ = self
1292 .terms
1293 .update_state(file_id, TermEntryState::ImportsResolved)
1294 .unwrap();
1295 Ok(CacheOp::Cached(Vec::new()))
1296 }
1297 // [^transitory_entry_state]
1298 //
1299 // This case is triggered by a cyclic import. The entry is already
1300 // being treated by an ongoing call to `resolve_import` higher up in
1301 // the call chain, so we don't do anything here.
1302 //
1303 // Note that in some cases, this intermediate state can be observed by an
1304 // external caller: if a first call to `resolve_imports` fails in the middle of
1305 // resolving the transitive imports, the end state of the entry is
1306 // `ImportsResolving`. Subsequent calls to `resolve_imports` will succeed, but
1307 // won't change the state to `EntryState::ImportsResolved` (and for a good
1308 // reason: we wouldn't even know what are the pending imports to resolve). The
1309 // Nickel pipeline should however fail if `resolve_imports` failed at some
1310 // point, anyway.
1311 Some(TermEntry {
1312 state: TermEntryState::ImportsResolving,
1313 ..
1314 }) => Ok(CacheOp::Done(Vec::new())),
1315 // >= EntryState::ImportsResolved
1316 Some(_) => Ok(CacheOp::Cached(Vec::new())),
1317 None => Err(CacheError::IncompatibleState {
1318 want: TermEntryState::Populated,
1319 }),
1320 }
1321 }
1322
1323 /// Generate the initial evaluation environment from the list of `file_ids` corresponding to the
1324 /// standard library parts.
1325 pub fn mk_eval_env<EC: EvalCache>(&self, eval_cache: &mut EC) -> eval::Environment {
1326 let mut eval_env = eval::Environment::new();
1327
1328 for (module, file_id) in self.sources.stdlib_modules() {
1329 // The internals module needs special treatment: it's required to be a record
1330 // literal, and its bindings are added directly to the environment
1331 if let nickel_stdlib::StdlibModule::Internals = module {
1332 let result = eval::env_add_record(
1333 eval_cache,
1334 &mut eval_env,
1335 Closure::atomic_closure(self.terms.get_owned(file_id).expect(
1336 "cache::mk_eval_env(): can't build environment, stdlib not parsed",
1337 )),
1338 );
1339 if let Err(eval::EnvBuildError::NotARecord(rt)) = result {
1340 panic!(
1341 "cache::Caches::mk_eval_env(): \
1342 expected the stdlib module {} to be a record, got {:?}",
1343 self.sources.name(file_id).to_string_lossy().as_ref(),
1344 rt
1345 )
1346 }
1347 } else {
1348 eval::env_add(
1349 eval_cache,
1350 &mut eval_env,
1351 module.name().into(),
1352 self.terms.get_owned(file_id).expect(
1353 "cache::Caches::mk_eval_env(): can't build environment, stdlib not parsed",
1354 ),
1355 eval::Environment::new(),
1356 );
1357 }
1358 }
1359
1360 eval_env
1361 }
1362
1363 /// Loads a new source as a string, replacing any existing source with the same name.
1364 ///
1365 /// If there was a previous source with the same name, its `FileId` is reused and the cached
1366 /// term is deleted.
1367 ///
1368 /// Used to store intermediate short-lived generated snippets that needs to have a
1369 /// corresponding `FileId`, such as when querying or reporting errors.
1370 pub fn replace_string(&mut self, source_name: SourcePath, s: String) -> FileId {
1371 if let Some(file_id) = self.sources.id_of(&source_name) {
1372 self.sources.files.update(file_id, s);
1373 self.asts.remove(file_id);
1374 self.terms.terms.remove(&file_id);
1375 file_id
1376 } else {
1377 let file_id = self.sources.files.add(source_name.clone(), s);
1378 self.sources.file_paths.insert(file_id, source_name.clone());
1379 self.sources.file_ids.insert(
1380 source_name,
1381 NameIdEntry {
1382 id: file_id,
1383 source: SourceKind::Memory,
1384 },
1385 );
1386 file_id
1387 }
1388 }
1389
1390 pub fn closurize<EC: EvalCache>(
1391 &mut self,
1392 eval_cache: &mut EC,
1393 file_id: FileId,
1394 ) -> Result<CacheOp<()>, TermCacheError<()>> {
1395 self.terms.closurize(eval_cache, &self.import_data, file_id)
1396 }
1397
1398 /// Add the bindings of a record to the REPL type environment. Ignore fields whose name are
1399 /// defined through interpolation.
1400 pub fn add_repl_bindings(&mut self, term: &RichTerm) -> Result<(), NotARecord> {
1401 let (slice, asts) = self.split_asts();
1402 asts.add_type_bindings(slice, term)
1403 }
1404
1405 /// Converts an AST and all of its transitive dependencies to the runtime representation,
1406 /// populating the term cache. `file_id` and any of its Nickel dependencies must be present in
1407 /// the AST cache, or [CacheError::IncompatibleState] is returned. However, for non-Nickel
1408 /// dependencies, they are instead parsed directly into the term cache,
1409 ///
1410 /// "Compile" is anticipating a bit on RFC007, although it is a lowering of the AST
1411 /// representation to the runtime representation.
1412 ///
1413 /// Compilation doesn't have a proper state associated, and thus should always be coupled with
1414 /// program transformations through [Self::compile_and_transform]. It should preferably not be
1415 /// observable as an atomic transition, although as far as I can tell, this shouldn't cause
1416 /// major troubles to do so.
1417 pub fn compile(&mut self, main_id: FileId) -> Result<CacheOp<()>, AstCacheError<ImportError>> {
1418 if self.terms.contains(main_id) {
1419 return Ok(CacheOp::Cached(()));
1420 }
1421
1422 // We set the format of the main `file_id` to `Nickel`, even if it is not, to require its
1423 // presence in either the term cache or the ast cache.
1424 let mut work_stack = vec![ImportTarget {
1425 file_id: main_id,
1426 format: InputFormat::default(),
1427 }];
1428
1429 while let Some(ImportTarget { file_id, format }) = work_stack.pop() {
1430 if self.terms.contains(file_id) {
1431 continue;
1432 }
1433
1434 let entry = if let InputFormat::Nickel = format {
1435 let ast_entry =
1436 self.asts
1437 .get_entry(file_id)
1438 .ok_or(CacheError::IncompatibleState {
1439 want: AstEntryState::Parsed,
1440 })?;
1441
1442 TermEntry {
1443 term: ast_entry.ast.to_mainline(),
1444 format: ast_entry.format,
1445 state: TermEntryState::default(),
1446 }
1447 } else {
1448 // We want to maintain the same error message as before the introduction of the two
1449 // distinct representations, and their processing in two stages (first Nickel files that
1450 // have an AST, and then others before evaluation).
1451 //
1452 // If we find a non-Nickel file here that needs to be parsed, it's because it's
1453 // been imported from somewhere else. The error used to be an import error, which
1454 // includes the location of the importing expression. We thus raise an import error
1455 // here, in case of failure.
1456 let term = self
1457 .sources
1458 .parse_other(file_id, format)
1459 .map_err(|parse_err| {
1460 CacheError::Error(ImportError::ParseErrors(
1461 parse_err.into(),
1462 self.import_data
1463 .rev_imports
1464 .get(&file_id)
1465 .and_then(|map| map.get(&main_id))
1466 .copied()
1467 .unwrap_or_default(),
1468 ))
1469 })?;
1470
1471 TermEntry {
1472 term,
1473 format,
1474 state: TermEntryState::default(),
1475 }
1476 };
1477
1478 self.terms.insert(file_id, entry);
1479
1480 work_stack.extend(
1481 self.import_data
1482 .imports
1483 .get(&file_id)
1484 .into_iter()
1485 .flat_map(|set| set.iter()),
1486 )
1487 }
1488
1489 Ok(CacheOp::Done(()))
1490 }
1491
1492 /// Converts an AST entry and all of its transitive dependencies to the runtime representation
1493 /// (compile), populating the term cache. Applies both import resolution and other program
1494 /// transformations on the resulting terms.
1495 pub fn compile_and_transform(
1496 &mut self,
1497 file_id: FileId,
1498 ) -> Result<CacheOp<()>, AstCacheError<Error>> {
1499 let mut done = false;
1500
1501 done = matches!(
1502 self.compile(file_id)
1503 .map_err(|cache_err| cache_err.map_err(Error::ImportError))?,
1504 CacheOp::Done(_)
1505 ) || done;
1506
1507 let imports = self
1508 .resolve_imports(file_id)
1509 // force_cast(): since we compiled `file_id`, the term cache must be populated, and
1510 // thus `resolve_imports` should never throw `CacheError::IncompatibleState`.
1511 .map_err(|cache_err| cache_err.map_err(Error::ImportError).force_cast())?;
1512 done = matches!(imports, CacheOp::Done(_)) || done;
1513
1514 let transform = self
1515 .terms
1516 .transform(&self.wildcards, &self.import_data, file_id)
1517 // force_cast(): since we compiled `file_id`, the term cache must be populated, and
1518 // thus `resolve_imports` should never throw `CacheError::IncompatibleState`.
1519 .map_err(|cache_err| {
1520 cache_err
1521 .map_err(|uvar_err| Error::ParseErrors(ParseErrors::from(uvar_err)))
1522 .force_cast()
1523 })?;
1524 done = matches!(transform, CacheOp::Done(_)) || done;
1525
1526 Ok(if done {
1527 CacheOp::Done(())
1528 } else {
1529 CacheOp::Cached(())
1530 })
1531 }
1532
1533 /// Creates a partial copy of this cache for evaluation purposes only. In particular, we don't
1534 /// copy anything related to arena-allocated ASTs. However, source files, imports data and
1535 /// terms are copied over, which is useful to make new evaluation caches cheaply, typically for
1536 /// NLS and benches.
1537 pub fn clone_for_eval(&self) -> Self {
1538 Self {
1539 terms: self.terms.clone(),
1540 sources: self.sources.clone(),
1541 asts: AstCache::empty(),
1542 wildcards: self.wildcards.clone(),
1543 import_data: self.import_data.clone(),
1544 #[cfg(debug_assertions)]
1545 skip_stdlib: self.skip_stdlib,
1546 }
1547 }
1548
1549 /// Split a mutable borrow to self into a mutable borrow of the AST cache and a mutable borrow
1550 /// of the rest.
1551 pub fn split_asts(&mut self) -> (CacheHubView<'_>, &mut AstCache) {
1552 (
1553 CacheHubView {
1554 terms: &mut self.terms,
1555 sources: &mut self.sources,
1556 wildcards: &mut self.wildcards,
1557 import_data: &mut self.import_data,
1558 #[cfg(debug_assertions)]
1559 skip_stdlib: self.skip_stdlib,
1560 },
1561 &mut self.asts,
1562 )
1563 }
1564
1565 /// See [SourceCache::input_format].
1566 pub fn input_format(&self, file_id: FileId) -> Option<InputFormat> {
1567 self.sources.input_format(file_id)
1568 }
1569}
1570
1571/// Because ASTs are arena-allocated, the self-referential [ast_cache::AstCache] which holds both
1572/// the arena and references to this arena often needs special treatment, if we want to make the
1573/// borrow checker happy. The following structure is basically a view of "everything but the ast
1574/// cache" into [CacheHub], so that we can separate and pack all the rest in a single structure,
1575/// making the signature of many [ast_cache::AstCache] methods much lighter.
1576pub struct CacheHubView<'cache> {
1577 terms: &'cache mut TermCache,
1578 sources: &'cache mut SourceCache,
1579 wildcards: &'cache mut WildcardsCache,
1580 import_data: &'cache mut ImportData,
1581 #[cfg(debug_assertions)]
1582 /// Skip loading the stdlib, used for debugging purpose
1583 skip_stdlib: bool,
1584}
1585
1586impl CacheHubView<'_> {
1587 /// Make a reborrow of this slice.
1588 pub fn reborrow(&mut self) -> CacheHubView<'_> {
1589 CacheHubView {
1590 terms: self.terms,
1591 sources: self.sources,
1592 wildcards: self.wildcards,
1593 import_data: self.import_data,
1594 #[cfg(debug_assertions)]
1595 skip_stdlib: self.skip_stdlib,
1596 }
1597 }
1598}
1599
1600/// An entry in the term cache. Stores the parsed term together with metadata and state.
1601#[derive(Debug, Clone, PartialEq)]
1602pub struct TermEntry {
1603 pub term: RichTerm,
1604 pub state: TermEntryState,
1605 pub format: InputFormat,
1606}
1607
1608/// An entry in the AST cache. Stores the parsed term together with metadata and state.
1609#[derive(Debug, Clone, PartialEq)]
1610pub struct AstEntry<'ast> {
1611 pub ast: &'ast Ast<'ast>,
1612 pub state: AstEntryState,
1613 pub format: InputFormat,
1614}
1615
1616impl<'ast> AstEntry<'ast> {
1617 /// Creates a new entry with default metadata.
1618 pub fn new(ast: &'ast Ast<'ast>) -> Self {
1619 AstEntry {
1620 ast,
1621 state: AstEntryState::default(),
1622 format: InputFormat::default(),
1623 }
1624 }
1625}
1626
1627/// Inputs can be read from the filesystem or from in-memory buffers (which come, e.g., from
1628/// the REPL, the standard library, or the language server).
1629///
1630/// Inputs read from the filesystem get auto-refreshed: if we try to access them again and
1631/// the on-disk file has changed, we read it again. Inputs read from in-memory buffers
1632/// are not auto-refreshed. If an in-memory buffer has a path that also exists in the
1633/// filesystem, we will not even check that file to see if it has changed.
1634///
1635/// An input that was open as an in-memory file may be closed, namely when the file is closed
1636/// or deleted from an editor using the LSP. In this case, the file will be read from the
1637/// filesystem again instead of using the in-memory value. Closing a file only makes sense in the
1638/// case that the [SourcePath] refers to a path on the filesystem. Other types of in-memory files,
1639/// like the standard library, cannot be closed.
1640#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Copy, Clone)]
1641enum SourceKind {
1642 Filesystem(SystemTime),
1643 Memory,
1644 MemoryClosed,
1645}
1646
1647/// The errors that can occur while closing an in memory file.
1648#[derive(Debug, Clone)]
1649pub enum FileCloseError {
1650 /// The file was not closed because no mapping of the source path to a [FileId] could be
1651 /// found.
1652 FileIdNotFound,
1653 /// A file with the given path was found, but it was not open in memory.
1654 FileNotOpen,
1655}
1656
1657impl fmt::Display for FileCloseError {
1658 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1659 match &self {
1660 FileCloseError::FileIdNotFound => {
1661 write!(
1662 f,
1663 "No file ID could be found for the file path to be closed."
1664 )
1665 }
1666 FileCloseError::FileNotOpen => {
1667 write!(f, "Attempted to close a file that was not open in-memory.")
1668 }
1669 }
1670 }
1671}
1672
1673impl std::error::Error for FileCloseError {}
1674
1675/// Contains information about the closed in-memory file and its replacement from the filesystem
1676/// in the case that an in-memory file was closed successfully.
1677pub struct FileCloseResult {
1678 /// The [FileId] of the in-memory file that was closed.
1679 pub closed_id: FileId,
1680 /// The [FileId] of the file loaded from the filesystem with the same path as the closed
1681 /// file, or an error indicating why the file could not be opened.
1682 /// An error would be expected here in the case that the file was deleted, which would
1683 /// also send a close file notification to the LSP.
1684 pub replacement_id: Result<FileId, io::Error>,
1685}
1686
1687/// Cache entries for sources.
1688///
1689/// A source can be either a snippet input by the user, in which case it is only identified by its
1690/// name in the name-id table, and a unique `FileId`. On the other hand, different versions of the
1691/// same file can coexist during the same session of the REPL. For this reason, an entry of the
1692/// name-id table of a file also stores the *modified at* timestamp, such that if a file is
1693/// imported or loaded again and has been modified in between, the entry is invalidated, the
1694/// content is loaded again and a new `FileId` is generated.
1695///
1696/// Note that in that case, invalidation just means that the `FileId` of a previous version is not
1697/// accessible anymore in the name-id table. However, terms that contain non evaluated imports or
1698/// source locations referring to previous version are still able access the corresponding source
1699/// or term which are kept respectively in `files` and `cache` by using the corresponding `FileId`.
1700#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Copy, Clone)]
1701pub struct NameIdEntry {
1702 id: FileId,
1703 source: SourceKind,
1704}
1705
1706/// The state of an entry of the term cache.
1707///
1708/// # Imports
1709///
1710/// Usually, when applying a procedure to a term entry (e.g. program transformations), we process
1711/// all of its transitive imports as well. We start by processing the entry, updating the state to
1712/// `XXXing` (ex: `Typechecking`) upon success. Only when all the imports have been successfully
1713/// processed, the state is updated to `XXXed` (ex: `Typechecked`).
1714#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Copy, Clone, Default)]
1715pub enum TermEntryState {
1716 /// The initial state. The term is in the cache but hasn't been processed further yet.
1717 #[default]
1718 Populated,
1719 /// A custom transformation of the entry (through `Program::custom_transform`) is underway.
1720 CustomTransforming,
1721 /// This entry has completed custom transformations of this ID and lower.
1722 CustomTransformed { transform_id: usize },
1723 /// The imports of the entry have been resolved, and the imports of its (transitive) imports are
1724 /// being resolved.
1725 ImportsResolving,
1726 /// The imports of the entry and its transitive dependencies has been resolved.
1727 ImportsResolved,
1728 /// The entry have been transformed, and its (transitive) imports are being transformed.
1729 Transforming,
1730 /// The entry and its transitive imports have been transformed.
1731 Transformed,
1732 /// The entry has been closurized.
1733 Closurized,
1734}
1735
1736impl TermEntryState {
1737 fn needs_custom_transform(&self, transform_id: usize) -> bool {
1738 if let TermEntryState::CustomTransformed {
1739 transform_id: done_transform_id,
1740 } = self
1741 {
1742 transform_id > *done_transform_id
1743 } else {
1744 *self < TermEntryState::CustomTransforming
1745 }
1746 }
1747}
1748
1749/// The state of an entry in the AST cache. Equivalent of [TermEntryState] but for ASTs.
1750#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Copy, Clone, Default)]
1751pub enum AstEntryState {
1752 /// The initial state. The AST is in the cache but hasn't been processed further yet.
1753 #[default]
1754 Parsed,
1755 /// The entry have been typechecked, and its (transitive) imports are being typechecked.
1756 Typechecking,
1757 /// The entry and its transitive imports have been typechecked.
1758 Typechecked,
1759}
1760
1761/// The result of a cache operation, such as parsing, typechecking, etc. which can either have
1762/// performed actual work, or have done nothing if the corresponding entry was already at a later
1763/// stage.
1764#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Copy, Clone)]
1765pub enum CacheOp<T> {
1766 Done(T),
1767 Cached(T),
1768}
1769
1770impl<T> CacheOp<T> {
1771 pub fn inner(self: CacheOp<T>) -> T {
1772 match self {
1773 CacheOp::Done(t) | CacheOp::Cached(t) => t,
1774 }
1775 }
1776}
1777
1778/// Wrapper around other errors to indicate that typechecking or applying program transformations
1779/// failed because the source has not been parsed yet.
1780///
1781/// #Â Type parameters
1782///
1783/// - `E`: the underlying, wrapped error type
1784/// - `S`: the entry state, whether [TermEntryState] or [AstEntryState] in practice.
1785#[derive(Eq, PartialEq, Debug, Clone)]
1786pub enum CacheError<E, S> {
1787 Error(E),
1788 /// The state of the entry in the cache is incompatible with the requested operation.
1789 IncompatibleState {
1790 want: S,
1791 },
1792}
1793
1794pub type AstCacheError<E> = CacheError<E, AstEntryState>;
1795pub type TermCacheError<E> = CacheError<E, TermEntryState>;
1796
1797impl<E, S> From<E> for CacheError<E, S> {
1798 fn from(e: E) -> Self {
1799 CacheError::Error(e)
1800 }
1801}
1802
1803impl<E, S> CacheError<E, S> {
1804 #[track_caller]
1805 pub fn unwrap_error(self, msg: &str) -> E {
1806 match self {
1807 CacheError::Error(err) => err,
1808 CacheError::IncompatibleState { .. } => panic!("{}", msg),
1809 }
1810 }
1811
1812 pub fn map_err<O>(self, f: impl FnOnce(E) -> O) -> CacheError<O, S> {
1813 match self {
1814 CacheError::Error(e) => CacheError::Error(f(e)),
1815 CacheError::IncompatibleState { want } => CacheError::IncompatibleState { want },
1816 }
1817 }
1818
1819 /// Assuming that `self` is of the form `CacheError::Error(e)`, cast the error type to another
1820 /// arbitrary state type `T`.
1821 ///
1822 /// # Panic
1823 ///
1824 /// This method panics if `self` is [CacheError::IncompatibleState].
1825 #[track_caller]
1826 pub fn force_cast<T>(self) -> CacheError<E, T> {
1827 match self {
1828 CacheError::Error(e) => CacheError::Error(e),
1829 CacheError::IncompatibleState { want: _ } => panic!(),
1830 }
1831 }
1832}
1833
1834/// Input data usually comes from files on the file system, but there are also lots of cases where
1835/// we want to synthesize other kinds of inputs.
1836///
1837/// Note that a [SourcePath] does not uniquely identify a cached input:
1838///
1839/// - Some functions (like [SourceCache::add_file]) add a new cached input unconditionally.
1840/// - [`SourceCache::get_or_add_file`] will add a new cached input at the same `SourcePath` if the file
1841/// on disk was updated.
1842///
1843/// The equality checking of `SourcePath` only affects [SourceCache::replace_string], which
1844/// overwrites any previous cached input with the same `SourcePath`.
1845#[derive(Debug, PartialEq, Eq, Hash, Clone)]
1846pub enum SourcePath {
1847 /// A file at the given path.
1848 ///
1849 /// Note that this does not need to be a real file on the filesystem: it could still be loaded
1850 /// from memory by, e.g, [`SourceCache::add_string`].
1851 ///
1852 /// This is the only `SourcePath` variant that can be resolved as the target of an import
1853 /// statement.
1854 Path(PathBuf, InputFormat),
1855 /// A subrange of a file at the given path.
1856 ///
1857 /// This is used by NLS to analyze small parts of files that don't fully parse. The original
1858 /// file path is preserved, because it's needed for resolving imports.
1859 Snippet(PathBuf),
1860 Std(StdlibModule),
1861 Query,
1862 ReplInput(usize),
1863 ReplTypecheck,
1864 ReplQuery,
1865 CliFieldAssignment,
1866 Override(FieldPath),
1867 Generated(String),
1868}
1869
1870impl<'a> TryFrom<&'a SourcePath> for &'a OsStr {
1871 type Error = ();
1872
1873 fn try_from(value: &'a SourcePath) -> Result<Self, Self::Error> {
1874 match value {
1875 SourcePath::Path(p, _) | SourcePath::Snippet(p) => Ok(p.as_os_str()),
1876 _ => Err(()),
1877 }
1878 }
1879}
1880
1881// [`Files`] needs to have an OsString for each file, so we synthesize names even for sources that
1882// don't have them. They don't need to be unique; they're just used for diagnostics.
1883impl From<SourcePath> for OsString {
1884 fn from(source_path: SourcePath) -> Self {
1885 match source_path {
1886 SourcePath::Path(p, _) | SourcePath::Snippet(p) => p.into(),
1887 SourcePath::Std(StdlibModule::Std) => "<stdlib/std.ncl>".into(),
1888 SourcePath::Std(StdlibModule::Internals) => "<stdlib/internals.ncl>".into(),
1889 SourcePath::Query => "<query>".into(),
1890 SourcePath::ReplInput(idx) => format!("<repl-input-{idx}>").into(),
1891 SourcePath::ReplTypecheck => "<repl-typecheck>".into(),
1892 SourcePath::ReplQuery => "<repl-query>".into(),
1893 SourcePath::CliFieldAssignment => "<cli-assignment>".into(),
1894 SourcePath::Override(path) => format!("<override {path}>",).into(),
1895 SourcePath::Generated(description) => format!("<generated {description}>").into(),
1896 }
1897 }
1898}
1899
1900/// Return status indicating if an import has been resolved from a file (first encounter), or was
1901/// retrieved from the cache.
1902///
1903/// See [ImportResolver::resolve].
1904#[derive(Debug, PartialEq, Eq)]
1905pub enum ResolvedTerm {
1906 FromFile {
1907 path: PathBuf, /* the loaded path */
1908 },
1909 FromCache,
1910}
1911
1912#[derive(Copy, Clone, Debug, PartialEq, Eq)]
1913pub enum SourceState {
1914 UpToDate(FileId),
1915 /// The source is stale because it came from a file on disk that has since been updated. The
1916 /// data is the timestamp of the new version of the file.
1917 Stale(SystemTime),
1918}
1919
1920/// Abstract the access to imported files and the import cache. Used by the evaluator and at the
1921/// [import resolution](crate::transform::import_resolution) phase.
1922///
1923/// The standard implementation uses 2 caches, the file cache for raw contents and the term cache
1924/// for parsed contents, mirroring the 2 steps when resolving an import:
1925///
1926/// 1. When an import is encountered for the first time, the content of the corresponding file is
1927/// read and stored in the file cache (consisting of the file database plus a map between paths
1928/// and ids in the database, the name-id table). The content is parsed, stored in the term
1929/// cache, and queued somewhere so that it can undergo the standard
1930/// [transformations](crate::transform) (including import resolution) later.
1931/// 2. When it is finally processed, the term cache is updated with the transformed term.
1932///
1933/// # RFC007
1934///
1935/// Import resolution on the old representation is still needed only because of the evaluator. The
1936/// typechecker now uses the new AST representation with its own import resolver.
1937pub trait ImportResolver {
1938 /// Resolves an import.
1939 ///
1940 /// Reads and stores the content of an import, puts it in the file cache (or get it from there
1941 /// if it is cached), then parses it and returns the corresponding term and file id.
1942 ///
1943 /// The term and the path are provided only if the import is processed for the first time.
1944 /// Indeed, at import resolution phase, the term of an import encountered for the first time is
1945 /// queued to be processed (e.g. having its own imports resolved). The path is needed to
1946 /// resolve nested imports relatively to this parent. Only after this processing the term is
1947 /// inserted back in the cache. On the other hand, if it has been resolved before, it is
1948 /// already transformed in the cache and do not need further processing.
1949 fn resolve(
1950 &mut self,
1951 import: &term::Import,
1952 parent: Option<FileId>,
1953 pos: &TermPos,
1954 ) -> Result<(ResolvedTerm, FileId), ImportError>;
1955
1956 /// Return a reference to the file database.
1957 fn files(&self) -> &Files;
1958
1959 /// Get a resolved import from the term cache.
1960 fn get(&self, file_id: FileId) -> Option<RichTerm>;
1961 /// Return the (potentially normalized) file path corresponding to the ID of a resolved import.
1962 fn get_path(&self, file_id: FileId) -> Option<&OsStr>;
1963
1964 /// Returns the base path for Nix evaluation, which is the parent directory of the source file
1965 /// if any, or the current working directory, or an empty path if we couldn't determine any of
1966 /// the previous two.
1967 ///
1968 /// This method need to be here because the evaluator makes use of it (when evaluating the
1969 /// `eval_nix` primop), but at this stage it only has access to the `ImportResolver` interface.
1970 /// We could give a default implementation here just using [Self::get_path], but we also need
1971 /// `get_base_dir_for_nix` in [SourceCache]. We reuse the latter `implementation instead of
1972 /// duplicating a more generic variant here.
1973 #[cfg(feature = "nix-experimental")]
1974 fn get_base_dir_for_nix(&self, file_id: FileId) -> PathBuf;
1975}
1976
1977impl ImportResolver for CacheHub {
1978 fn resolve(
1979 &mut self,
1980 import: &term::Import,
1981 parent: Option<FileId>,
1982 pos: &TermPos,
1983 ) -> Result<(ResolvedTerm, FileId), ImportError> {
1984 let (possible_parents, path, pkg_id, format) = match import {
1985 term::Import::Path { path, format } => {
1986 // `parent` is the file that did the import. We first look in its containing directory, followed by
1987 // the directories in the import path.
1988 let mut parent_path = parent
1989 .and_then(|p| self.get_path(p))
1990 .map(PathBuf::from)
1991 .unwrap_or_default();
1992 parent_path.pop();
1993
1994 (
1995 std::iter::once(parent_path)
1996 .chain(self.sources.import_paths.iter().cloned())
1997 .collect(),
1998 Path::new(path),
1999 None,
2000 *format,
2001 )
2002 }
2003 term::Import::Package { id } => {
2004 let package_map = self
2005 .sources
2006 .package_map
2007 .as_ref()
2008 .ok_or(ImportError::NoPackageMap { pos: *pos })?;
2009 let parent_path = parent
2010 .and_then(|p| self.sources.packages.get(&p))
2011 .map(PathBuf::as_path);
2012 let pkg_path = package_map.get(parent_path, *id, *pos)?;
2013 (
2014 vec![pkg_path.to_owned()],
2015 Path::new("main.ncl"),
2016 Some(pkg_path.to_owned()),
2017 // Packages are always in nickel format
2018 InputFormat::Nickel,
2019 )
2020 }
2021 };
2022
2023 // Try to import from all possibilities, taking the first one that succeeds.
2024 let (id_op, path_buf) = possible_parents
2025 .iter()
2026 .find_map(|parent| {
2027 let mut path_buf = parent.clone();
2028 path_buf.push(path);
2029 self.sources
2030 .get_or_add_file(&path_buf, format)
2031 .ok()
2032 .map(|x| (x, path_buf))
2033 })
2034 .ok_or_else(|| {
2035 let parents = possible_parents
2036 .iter()
2037 .map(|p| p.to_string_lossy())
2038 .collect::<Vec<_>>();
2039 ImportError::IOError(
2040 path.to_string_lossy().into_owned(),
2041 format!("could not find import (looked in [{}])", parents.join(", ")),
2042 *pos,
2043 )
2044 })?;
2045
2046 let (result, file_id) = match id_op {
2047 CacheOp::Cached(id) => (ResolvedTerm::FromCache, id),
2048 CacheOp::Done(id) => (ResolvedTerm::FromFile { path: path_buf }, id),
2049 };
2050
2051 if let Some(parent) = parent {
2052 self.import_data
2053 .imports
2054 .entry(parent)
2055 .or_default()
2056 .insert(ImportTarget { file_id, format });
2057 self.import_data
2058 .rev_imports
2059 .entry(file_id)
2060 .or_default()
2061 .entry(parent)
2062 .or_insert(*pos);
2063 }
2064
2065 self.parse_to_term(file_id, format)
2066 .map_err(|err| ImportError::ParseErrors(err, *pos))?;
2067
2068 if let Some(pkg_id) = pkg_id {
2069 self.sources.packages.insert(file_id, pkg_id);
2070 }
2071
2072 Ok((result, file_id))
2073 }
2074
2075 fn files(&self) -> &Files {
2076 &self.sources.files
2077 }
2078
2079 fn get(&self, file_id: FileId) -> Option<RichTerm> {
2080 self.terms
2081 .terms
2082 .get(&file_id)
2083 .map(|TermEntry { term, .. }| term.clone())
2084 }
2085
2086 fn get_path(&self, file_id: FileId) -> Option<&OsStr> {
2087 self.sources
2088 .file_paths
2089 .get(&file_id)
2090 .and_then(|p| p.try_into().ok())
2091 }
2092
2093 #[cfg(feature = "nix-experimental")]
2094 fn get_base_dir_for_nix(&self, file_id: FileId) -> PathBuf {
2095 self.sources.get_base_dir_for_nix(file_id)
2096 }
2097}
2098
2099/// Import resolution for new AST representation (RFC007).
2100pub trait AstImportResolver {
2101 /// Resolves an import to an AST.
2102 ///
2103 /// Reads and stores the content of an import, puts it in the file cache (or gets it from there
2104 /// if it is cached), then parses it and returns the corresponding term and file id.
2105 ///
2106 /// The term and the path are provided only if the import is processed for the first time.
2107 /// Indeed, at import resolution phase, the term of an import encountered for the first time is
2108 /// queued to be processed (e.g. having its own imports resolved). The path is needed to
2109 /// resolve nested imports relatively to this parent. Only after this processing the term is
2110 /// inserted back in the cache. On the other hand, if it has been resolved before, it is
2111 /// already transformed in the cache and do not need further processing.
2112 ///
2113 /// # Returns
2114 ///
2115 /// [Self::resolve] returns `Ok(None)` if the import is an external format, which can currently
2116 /// be serialized directly to he runtime representation ([crate::bytecode::value::NickelValue])
2117 /// without going through an AST. AST import resolution is mostly used by the typechecker, and
2118 /// the typechecker currently ignores external formats anyway.
2119 ///
2120 /// # Lifetimes
2121 ///
2122 /// The signature is parametrized by two different lifetimes. This is due mostly to NLS: in the
2123 /// normal Nickel pipeline, all the ASTs are currently allocated in the same arena, and their
2124 /// lifetime is the same. However, in NLS, each files needs to be managed separately. At the
2125 /// import boundary, we're thus not guaranteed to get an AST that lives as long as the one
2126 /// being currently typechecked.
2127 fn resolve<'ast_out>(
2128 &'ast_out mut self,
2129 import: &ast::Import<'_>,
2130 pos: &TermPos,
2131 ) -> Result<Option<&'ast_out Ast<'ast_out>>, ImportError>;
2132}
2133
2134/// Normalize the path of a file for unique identification in the cache.
2135///
2136/// The returned path will be an absolute path.
2137pub fn normalize_path(path: impl Into<PathBuf>) -> std::io::Result<PathBuf> {
2138 let mut path = path.into();
2139 if path.is_relative() {
2140 path = std::env::current_dir()?.join(path);
2141 }
2142 Ok(normalize_abs_path(&path))
2143}
2144
2145/// Normalize the path (assumed to be absolute) of a file for unique identification in the cache.
2146///
2147/// This implementation (including the comment below) was taken from cargo-util.
2148///
2149/// CAUTION: This does not resolve symlinks (unlike [`std::fs::canonicalize`]). This may cause
2150/// incorrect or surprising behavior at times. This should be used carefully. Unfortunately,
2151/// [`std::fs::canonicalize`] can be hard to use correctly, since it can often fail, or on Windows
2152/// returns annoying device paths. This is a problem Cargo needs to improve on.
2153pub fn normalize_abs_path(path: &Path) -> PathBuf {
2154 use std::path::Component;
2155
2156 let mut components = path.components().peekable();
2157 let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().cloned() {
2158 components.next();
2159 PathBuf::from(c.as_os_str())
2160 } else {
2161 PathBuf::new()
2162 };
2163
2164 for component in components {
2165 match component {
2166 Component::Prefix(..) => unreachable!(),
2167 Component::RootDir => {
2168 ret.push(component.as_os_str());
2169 }
2170 Component::CurDir => {}
2171 Component::ParentDir => {
2172 ret.pop();
2173 }
2174 Component::Normal(c) => {
2175 ret.push(c);
2176 }
2177 }
2178 }
2179 ret
2180}
2181
2182/// Normalize a relative path, removing mid-path `..`s.
2183///
2184/// Like [`normalize_abs_path`], this works only on the path itself (i.e. not the filesystem) and
2185/// does not follow symlinks.
2186pub fn normalize_rel_path(path: &Path) -> PathBuf {
2187 use std::path::Component;
2188
2189 let mut components = path.components().peekable();
2190 let mut parents = PathBuf::new();
2191 let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().cloned() {
2192 components.next();
2193 PathBuf::from(c.as_os_str())
2194 } else {
2195 PathBuf::new()
2196 };
2197
2198 for component in components {
2199 match component {
2200 Component::Prefix(..) => unreachable!(),
2201 Component::RootDir => {
2202 ret.push(component.as_os_str());
2203 }
2204 Component::CurDir => {}
2205 Component::ParentDir => {
2206 if !ret.pop() {
2207 parents.push(Component::ParentDir);
2208 }
2209 }
2210 Component::Normal(c) => {
2211 ret.push(c);
2212 }
2213 }
2214 }
2215 parents.extend(ret.components());
2216 parents
2217}
2218
2219/// Returns the timestamp of a file. Return `None` if an IO error occurred.
2220pub fn timestamp(path: impl AsRef<OsStr>) -> io::Result<SystemTime> {
2221 fs::metadata(path.as_ref())?.modified()
2222}
2223
2224/// As RFC007 is being rolled out, the typechecker now needs to operate on the new AST. We need a
2225/// structure that implements [AstImportResolver].
2226///
2227/// For borrowing reasons, this can't be all of [CacheHub] or all of [ast_cache::AstCache], as we
2228/// need to split the different things that are borrowed mutably or immutably. `AstResolver` is a
2229/// structure that borrows some parts of the cache during its lifetime and will retrieve alredy
2230/// imported ASTs, or register the newly imported ones in a separate hashmap that can be added back
2231/// to the original cache once import resolution is done.
2232pub struct AstResolver<'ast, 'cache> {
2233 /// The AST allocator used to parse new sources.
2234 alloc: &'ast AstAlloc,
2235 /// The AST cache, which is added to as import resolution progresses.
2236 asts: &'cache mut HashMap<FileId, AstEntry<'ast>>,
2237 /// The source cache where new sources will be stored.
2238 sources: &'cache mut SourceCache,
2239 /// Direct and reverse dependencies of files (with respect to imports).
2240 import_data: &'cache mut ImportData,
2241}
2242
2243impl<'ast, 'cache> AstResolver<'ast, 'cache> {
2244 /// Create a new `AstResolver` from an allocator, an ast cache and a cache hub slice.
2245 pub fn new(
2246 alloc: &'ast AstAlloc,
2247 asts: &'cache mut HashMap<FileId, AstEntry<'ast>>,
2248 slice: CacheHubView<'cache>,
2249 ) -> Self {
2250 Self {
2251 alloc,
2252 asts,
2253 sources: slice.sources,
2254 import_data: slice.import_data,
2255 }
2256 }
2257}
2258
2259impl AstImportResolver for AstResolver<'_, '_> {
2260 fn resolve(
2261 &mut self,
2262 import: &ast::Import<'_>,
2263 pos: &TermPos,
2264 ) -> Result<Option<&Ast<'_>>, ImportError> {
2265 let parent_id = pos.src_id();
2266
2267 let (possible_parents, path, pkg_id, format) = match import {
2268 ast::Import::Path { path, format } => {
2269 // `parent` is the file that did the import. We first look in its containing
2270 // directory, followed by the directories in the import path.
2271 let parent_path = parent_id
2272 .and_then(|parent| self.sources.file_paths.get(&parent))
2273 .and_then(|path| <&OsStr>::try_from(path).ok())
2274 .map(PathBuf::from)
2275 .map(|mut path| {
2276 path.pop();
2277 path
2278 })
2279 // If the parent isn't a proper file, we look in the current directory instead.
2280 // This is useful when importing e.g. from the REPL or the CLI directly.
2281 .unwrap_or_default();
2282
2283 (
2284 std::iter::once(parent_path)
2285 .chain(self.sources.import_paths.iter().cloned())
2286 .collect(),
2287 Path::new(path),
2288 None,
2289 *format,
2290 )
2291 }
2292 ast::Import::Package { id } => {
2293 let package_map = self
2294 .sources
2295 .package_map
2296 .as_ref()
2297 .ok_or(ImportError::NoPackageMap { pos: *pos })?;
2298 let parent_path = parent_id
2299 .and_then(|p| self.sources.packages.get(&p))
2300 .map(PathBuf::as_path);
2301 let pkg_path = package_map.get(parent_path, *id, *pos)?;
2302 (
2303 vec![pkg_path.to_owned()],
2304 Path::new("main.ncl"),
2305 Some(pkg_path.to_owned()),
2306 // Packages are always in nickel format
2307 InputFormat::Nickel,
2308 )
2309 }
2310 };
2311
2312 // Try to import from all possibilities, taking the first one that succeeds.
2313 let id_op = possible_parents
2314 .iter()
2315 .find_map(|parent| {
2316 let mut path_buf = parent.clone();
2317 path_buf.push(path);
2318 self.sources.get_or_add_file(&path_buf, format).ok()
2319 })
2320 .ok_or_else(|| {
2321 let parents = possible_parents
2322 .iter()
2323 .map(|p| p.to_string_lossy())
2324 .collect::<Vec<_>>();
2325 ImportError::IOError(
2326 path.to_string_lossy().into_owned(),
2327 format!("could not find import (looked in [{}])", parents.join(", ")),
2328 *pos,
2329 )
2330 })?;
2331
2332 let file_id = id_op.inner();
2333
2334 if let Some(parent_id) = parent_id {
2335 self.import_data
2336 .imports
2337 .entry(parent_id)
2338 .or_default()
2339 .insert(ImportTarget { file_id, format });
2340 self.import_data
2341 .rev_imports
2342 .entry(file_id)
2343 .or_default()
2344 .entry(parent_id)
2345 .or_insert(*pos);
2346 }
2347
2348 if let Some(pkg_id) = pkg_id {
2349 self.sources.packages.insert(file_id, pkg_id);
2350 }
2351
2352 if let InputFormat::Nickel = format {
2353 if let Some(entry) = self.asts.get(&file_id) {
2354 Ok(Some(entry.ast))
2355 } else {
2356 let ast = parse_nickel(self.alloc, file_id, self.sources.files.source(file_id))
2357 .map_err(|parse_err| ImportError::ParseErrors(parse_err, *pos))?;
2358 let ast = self.alloc.alloc(ast);
2359 self.asts.insert(file_id, AstEntry::new(ast));
2360
2361 Ok(Some(ast))
2362 }
2363 } else {
2364 // Currently, non-Nickel file are just ignored during the AST file. They are parsed
2365 // later directly into the runtime
2366 Ok(None)
2367 }
2368 }
2369}
2370
2371/// Provide mockup import resolvers for testing purpose.
2372pub mod resolvers {
2373 use super::*;
2374 use crate::term::Import;
2375
2376 /// A dummy resolver that panics when asked to do something. Used to test code that contains no
2377 /// import.
2378 pub struct DummyResolver {}
2379
2380 impl ImportResolver for DummyResolver {
2381 fn resolve(
2382 &mut self,
2383 _import: &Import,
2384 _parent: Option<FileId>,
2385 _pos: &TermPos,
2386 ) -> Result<(ResolvedTerm, FileId), ImportError> {
2387 panic!("cache::resolvers: dummy resolver should not have been invoked");
2388 }
2389
2390 fn files(&self) -> &Files {
2391 panic!("cache::resolvers: dummy resolver should not have been invoked");
2392 }
2393
2394 fn get(&self, _file_id: FileId) -> Option<RichTerm> {
2395 panic!("cache::resolvers: dummy resolver should not have been invoked");
2396 }
2397
2398 fn get_path(&self, _file_id: FileId) -> Option<&OsStr> {
2399 panic!("cache::resolvers: dummy resolver should not have been invoked");
2400 }
2401
2402 #[cfg(feature = "nix-experimental")]
2403 fn get_base_dir_for_nix(&self, _file_id: FileId) -> PathBuf {
2404 panic!("cache::resolvers: dummy resolver should not have been invoked");
2405 }
2406 }
2407
2408 /// Resolve imports from a mockup file database. Used to test imports without accessing the
2409 /// file system. File name are stored as strings, and silently converted from/to `OsString`
2410 /// when needed: don't use this resolver with source code that import non UTF-8 paths.
2411 #[derive(Clone, Default)]
2412 pub struct SimpleResolver {
2413 files: Files,
2414 file_cache: HashMap<String, FileId>,
2415 term_cache: HashMap<FileId, RichTerm>,
2416 }
2417
2418 impl SimpleResolver {
2419 pub fn new() -> SimpleResolver {
2420 SimpleResolver::default()
2421 }
2422
2423 /// Add a mockup file to available imports.
2424 pub fn add_source(&mut self, name: String, source: String) {
2425 let id = self.files.add(name.clone(), source);
2426 self.file_cache.insert(name, id);
2427 }
2428 }
2429
2430 impl ImportResolver for SimpleResolver {
2431 fn resolve(
2432 &mut self,
2433 import: &Import,
2434 _parent: Option<FileId>,
2435 pos: &TermPos,
2436 ) -> Result<(ResolvedTerm, FileId), ImportError> {
2437 let Import::Path { path, .. } = import else {
2438 panic!("simple resolver doesn't support packages");
2439 };
2440
2441 let file_id = self
2442 .file_cache
2443 .get(path.to_string_lossy().as_ref())
2444 .copied()
2445 .ok_or_else(|| {
2446 ImportError::IOError(
2447 path.to_string_lossy().into_owned(),
2448 String::from("Import not found by the mockup resolver."),
2449 *pos,
2450 )
2451 })?;
2452
2453 if let hash_map::Entry::Vacant(e) = self.term_cache.entry(file_id) {
2454 let buf = self.files.source(file_id);
2455 let alloc = AstAlloc::new();
2456
2457 let ast = parser::grammar::TermParser::new()
2458 .parse_strict(&alloc, file_id, Lexer::new(buf))
2459 .map_err(|e| ImportError::ParseErrors(e, *pos))?;
2460 e.insert(ast.to_mainline());
2461
2462 Ok((
2463 ResolvedTerm::FromFile {
2464 path: PathBuf::new(),
2465 },
2466 file_id,
2467 ))
2468 } else {
2469 Ok((ResolvedTerm::FromCache, file_id))
2470 }
2471 }
2472
2473 fn files(&self) -> &Files {
2474 &self.files
2475 }
2476
2477 fn get(&self, file_id: FileId) -> Option<RichTerm> {
2478 self.term_cache.get(&file_id).cloned()
2479 }
2480
2481 fn get_path(&self, file_id: FileId) -> Option<&OsStr> {
2482 Some(self.files.name(file_id))
2483 }
2484
2485 #[cfg(feature = "nix-experimental")]
2486 fn get_base_dir_for_nix(&self, file_id: FileId) -> PathBuf {
2487 self.get_path(file_id)
2488 .and_then(|path| Path::new(path).parent())
2489 .map(PathBuf::from)
2490 .unwrap_or_default()
2491 }
2492 }
2493}
2494
2495/// Parses a Nickel expression from a string.
2496fn parse_nickel<'ast>(
2497 alloc: &'ast AstAlloc,
2498 file_id: FileId,
2499 source: &str,
2500) -> Result<Ast<'ast>, ParseErrors> {
2501 let ast = measure_runtime!(
2502 "runtime:parse:nickel",
2503 parser::grammar::TermParser::new().parse_strict(alloc, file_id, Lexer::new(source))?
2504 );
2505
2506 Ok(ast)
2507}
2508
2509// Parse a Nickel REPL input. In addition to normal Nickel expressions, it can be a top-level let.
2510fn parse_nickel_repl<'ast>(
2511 alloc: &'ast AstAlloc,
2512 file_id: FileId,
2513 source: &str,
2514) -> Result<ExtendedTerm<Ast<'ast>>, ParseErrors> {
2515 let et = measure_runtime!(
2516 "runtime:parse:nickel",
2517 parser::grammar::ExtendedTermParser::new().parse_strict(
2518 alloc,
2519 file_id,
2520 Lexer::new(source)
2521 )?
2522 );
2523
2524 Ok(et)
2525}
2526
2527/// AST cache (for the new [crate::bytecode::ast::Ast]) that holds the owned allocator of the AST
2528/// nodes.
2529mod ast_cache {
2530 use super::*;
2531 /// The AST cache packing together the AST allocator and the cached ASTs.
2532 #[self_referencing]
2533 pub struct AstCache {
2534 /// The allocator hosting AST nodes.
2535 alloc: AstAlloc,
2536 /// An AST for each file we have cached.
2537 #[borrows(alloc)]
2538 #[covariant]
2539 asts: HashMap<FileId, AstEntry<'this>>,
2540 /// The initial typing context. It's morally an option (unitialized at first), but we just
2541 /// use an empty context as a default value.
2542 ///
2543 /// This context can be augmented through [AstCache::add_repl_binding] and
2544 /// [AstCache::add_repl_bindings], which is typically used in the REPL to add top-level
2545 /// bindings.
2546 #[borrows(alloc)]
2547 #[not_covariant]
2548 type_ctxt: typecheck::Context<'this>,
2549 }
2550
2551 impl AstCache {
2552 /// Construct a new, empty, AST cache.
2553 pub fn empty() -> Self {
2554 AstCache::new(
2555 AstAlloc::new(),
2556 |_alloc| HashMap::new(),
2557 |_alloc| typecheck::Context::new(),
2558 )
2559 }
2560
2561 /// Clears the allocator and the cached ASTs.
2562 pub fn clear(&mut self) {
2563 *self = Self::empty();
2564 }
2565
2566 /// Returns `true` if the AST cache contains an entry for the given file id.
2567 pub fn contains(&self, file_id: FileId) -> bool {
2568 self.borrow_asts().contains_key(&file_id)
2569 }
2570
2571 /// Returns the underlying allocator, which might be required to call various helpers.
2572 pub fn get_alloc(&self) -> &AstAlloc {
2573 self.borrow_alloc()
2574 }
2575
2576 /// Returns a reference to a cached AST.
2577 pub fn get(&self, file_id: FileId) -> Option<&Ast<'_>> {
2578 self.borrow_asts().get(&file_id).map(|entry| entry.ast)
2579 }
2580
2581 /// Returns a reference to a cached AST entry.
2582 pub fn get_entry(&self, file_id: FileId) -> Option<&AstEntry<'_>> {
2583 self.borrow_asts().get(&file_id)
2584 }
2585
2586 /// Retrieves the state of an entry. Returns `None` if the entry is not in the AST cache.
2587 pub fn entry_state(&self, file_id: FileId) -> Option<AstEntryState> {
2588 self.borrow_asts()
2589 .get(&file_id)
2590 .map(|AstEntry { state, .. }| *state)
2591 }
2592
2593 /// Updates the state of an entry and returns the previous state, or an error if the entry
2594 /// isn't in the cache.
2595 pub fn update_state(
2596 &mut self,
2597 file_id: FileId,
2598 new: AstEntryState,
2599 ) -> Result<AstEntryState, TermNotFound> {
2600 self.with_asts_mut(|asts| {
2601 asts.get_mut(&file_id)
2602 .map(|AstEntry { state, .. }| std::mem::replace(state, new))
2603 })
2604 .ok_or(TermNotFound)
2605 }
2606
2607 /// Parses a Nickel expression and stores the corresponding AST in the cache.
2608 pub fn parse_nickel<'ast>(
2609 &'ast mut self,
2610 file_id: FileId,
2611 source: &str,
2612 ) -> Result<&'ast Ast<'ast>, ParseErrors> {
2613 self.with_mut(|slf| {
2614 let ast = parse_nickel(slf.alloc, file_id, source)?;
2615 let ast = slf.alloc.alloc(ast);
2616 slf.asts.insert(file_id, AstEntry::new(ast));
2617
2618 Ok(ast)
2619 })
2620 }
2621
2622 /// Same as [Self::parse_nickel] but accepts the extended syntax allowed in the REPL.
2623 ///
2624 /// **Caution**: this method doesn't cache the potential id of a top-level let binding,
2625 /// although it does save the bound expression, which is required later for typechecking,
2626 /// program transformation, etc.
2627 pub fn parse_nickel_repl<'ast>(
2628 &'ast mut self,
2629 file_id: FileId,
2630 source: &str,
2631 ) -> Result<ExtendedTerm<Ast<'ast>>, ParseErrors> {
2632 self.with_mut(|slf| {
2633 let extd_ast = parse_nickel_repl(slf.alloc, file_id, source)?;
2634
2635 let ast = match &extd_ast {
2636 ExtendedTerm::Term(t) | ExtendedTerm::ToplevelLet(_, t) => {
2637 slf.alloc.alloc(t.clone())
2638 }
2639 };
2640
2641 slf.asts.insert(file_id, AstEntry::new(ast));
2642
2643 Ok(extd_ast)
2644 })
2645 }
2646
2647 pub fn remove(&mut self, file_id: FileId) -> Option<AstEntry<'_>> {
2648 self.with_asts_mut(|asts| asts.remove(&file_id))
2649 }
2650
2651 /// Typechecks an entry of the cache and updates its state accordingly, or does nothing if
2652 /// the entry has already been typechecked. Requires that the corresponding source has been
2653 /// parsed. Note that this method currently fail on a non-Nickel file, that can't have been
2654 /// parsed to an AST.
2655 ///
2656 /// If the source contains imports, recursively typecheck on the imports too.
2657 ///
2658 /// # RFC007
2659 ///
2660 /// During the transition period between the old VM and the new bytecode VM, this method
2661 /// performs typechecking on the new representation [crate::bytecode::ast::Ast], and is also
2662 /// responsible for then converting the term to the legacy representation and populate the
2663 /// corresponding term cache.
2664 pub fn typecheck(
2665 &mut self,
2666 mut slice: CacheHubView<'_>,
2667 file_id: FileId,
2668 initial_mode: TypecheckMode,
2669 ) -> Result<CacheOp<()>, AstCacheError<TypecheckError>> {
2670 let Some(state) = self.entry_state(file_id) else {
2671 return Err(CacheError::IncompatibleState {
2672 want: AstEntryState::Parsed,
2673 });
2674 };
2675
2676 // If we're already typechecking or we have typechecked the file, we stop right here.
2677 if state >= AstEntryState::Typechecking {
2678 return Ok(CacheOp::Cached(()));
2679 }
2680
2681 // Protect against cycles in the import graph.
2682 // unwrap(): we checked at the beginning of this function that the term is in the
2683 // cache.
2684 let _ = self
2685 .update_state(file_id, AstEntryState::Typechecking)
2686 .unwrap();
2687
2688 // Ensure the initial typing context is properly initialized.
2689 self.populate_type_ctxt(slice.sources);
2690 self.with_mut(|slf| -> Result<(), AstCacheError<TypecheckError>> {
2691 // unwrap(): we checked at the beginning of this function that the AST cache has an
2692 // entry for `file_id`.
2693 let ast = slf.asts.get(&file_id).unwrap().ast;
2694
2695 let mut resolver = AstResolver::new(slf.alloc, slf.asts, slice.reborrow());
2696 let type_ctxt = slf.type_ctxt.clone();
2697 let wildcards_map = measure_runtime!(
2698 "runtime:type_check",
2699 typecheck(slf.alloc, ast, type_ctxt, &mut resolver, initial_mode)?
2700 );
2701 slice.wildcards.wildcards.insert(
2702 file_id,
2703 wildcards_map.iter().map(ToMainline::to_mainline).collect(),
2704 );
2705 Ok(())
2706 })?;
2707
2708 // Typecheck dependencies (files imported by this file).
2709 if let Some(imports) = slice.import_data.imports.get(&file_id) {
2710 // Because we need to borrow `import_data` for typechecking, we need to release the
2711 // borrow by moving the content of `imports` somewhere else.
2712 //
2713 // We ignore non-Nickel imports, which aren't typechecked, and are currently not
2714 // even in the AST cache.
2715 let imports: Vec<_> = imports
2716 .iter()
2717 .filter_map(|tgt| {
2718 if let InputFormat::Nickel = tgt.format {
2719 Some(tgt.file_id)
2720 } else {
2721 None
2722 }
2723 })
2724 .collect();
2725
2726 for file_id in imports {
2727 self.typecheck(slice.reborrow(), file_id, initial_mode)?;
2728 }
2729 }
2730
2731 // unwrap(): we checked at the beginning of this function that the AST is in the
2732 // cache.
2733 let _ = self
2734 .update_state(file_id, AstEntryState::Typechecked)
2735 .unwrap();
2736
2737 Ok(CacheOp::Done(()))
2738 }
2739
2740 /// Typechecks the stdlib. This has to be public because it's used in benches. It probably
2741 /// does not have to be used for something else.
2742 pub fn typecheck_stdlib(
2743 &mut self,
2744 mut slice: CacheHubView<'_>,
2745 ) -> Result<CacheOp<()>, AstCacheError<TypecheckError>> {
2746 let mut ret = CacheOp::Cached(());
2747 self.populate_type_ctxt(slice.sources);
2748
2749 for (_, stdlib_module_id) in slice.sources.stdlib_modules() {
2750 let result =
2751 self.typecheck(slice.reborrow(), stdlib_module_id, TypecheckMode::Walk)?;
2752
2753 if let CacheOp::Done(()) = result {
2754 ret = CacheOp::Done(());
2755 }
2756 }
2757
2758 Ok(ret)
2759 }
2760
2761 /// Typechecks a file (if it wasn't already) and returns the inferred type, with type
2762 /// wildcards properly substituted.
2763 pub fn type_of(
2764 &mut self,
2765 mut slice: CacheHubView<'_>,
2766 file_id: FileId,
2767 ) -> Result<CacheOp<mainline_typ::Type>, AstCacheError<TypecheckError>> {
2768 self.typecheck(slice.reborrow(), file_id, TypecheckMode::Walk)?;
2769
2770 let typ: Result<ast::typ::Type<'_>, AstCacheError<TypecheckError>> =
2771 self.with_mut(|slf| {
2772 let ast = slf
2773 .asts
2774 .get(&file_id)
2775 .ok_or(CacheError::IncompatibleState {
2776 want: AstEntryState::Parsed,
2777 })?
2778 .ast;
2779
2780 let mut resolver = AstResolver::new(slf.alloc, slf.asts, slice.reborrow());
2781 let type_ctxt = slf.type_ctxt.clone();
2782
2783 let typ = TryConvert::try_convert(
2784 slf.alloc,
2785 ast.apparent_type(
2786 slf.alloc,
2787 Some(&type_ctxt.type_env),
2788 Some(&mut resolver),
2789 ),
2790 )
2791 .unwrap_or(ast::typ::TypeF::Dyn.into());
2792 Ok(typ)
2793 });
2794 let typ = typ?;
2795
2796 let target: mainline_typ::Type = typ.to_mainline();
2797
2798 // unwrap(): we ensured that the file is typechecked, thus its wildcards and its AST
2799 // must be populated
2800 let wildcards = slice.wildcards.get(file_id).unwrap();
2801
2802 Ok(CacheOp::Done(
2803 target
2804 .traverse(
2805 &mut |ty: mainline_typ::Type| -> Result<_, std::convert::Infallible> {
2806 if let mainline_typ::TypeF::Wildcard(id) = ty.typ {
2807 Ok(wildcards
2808 .get(id)
2809 .cloned()
2810 .unwrap_or(mainline_typ::Type::from(mainline_typ::TypeF::Dyn)))
2811 } else {
2812 Ok(ty)
2813 }
2814 },
2815 TraverseOrder::TopDown,
2816 )
2817 .unwrap(),
2818 ))
2819 }
2820
2821 /// If the type context hasn't been created yet, generate and cache the initial typing
2822 /// context from the list of `file_ids` corresponding to the standard library parts.
2823 /// Otherwise, do nothing.
2824 fn populate_type_ctxt(&mut self, sources: &SourceCache) {
2825 self.with_mut(|slf| {
2826 if !slf.type_ctxt.is_empty() {
2827 return;
2828 }
2829 let stdlib_terms_vec: Vec<(StdlibModule, &'_ Ast<'_>)> = sources
2830 .stdlib_modules()
2831 .map(|(module, file_id)| {
2832 let ast = slf.asts.get(&file_id).map(|entry| entry.ast);
2833
2834 (
2835 module,
2836 ast.expect("cache::ast_cache::AstCache::populate_type_ctxt(): can't build environment, stdlib not parsed")
2837 )
2838 })
2839 .collect();
2840
2841 *slf.type_ctxt = typecheck::mk_initial_ctxt(slf.alloc, stdlib_terms_vec).unwrap();
2842 });
2843 }
2844
2845 /// Adds a binding to the type environment. The bound term is identified by its file id
2846 /// `file_id`.
2847 pub fn add_type_binding(
2848 &mut self,
2849 mut slice: CacheHubView<'_>,
2850 id: LocIdent,
2851 file_id: FileId,
2852 ) -> Result<(), AstCacheError<std::convert::Infallible>> {
2853 self.with_mut(|slf| {
2854 let Some(entry) = slf.asts.get(&file_id) else {
2855 return Err(CacheError::IncompatibleState {
2856 want: AstEntryState::Parsed,
2857 });
2858 };
2859
2860 let ast = entry.ast;
2861 let mut resolver = AstResolver::new(slf.alloc, slf.asts, slice.reborrow());
2862
2863 typecheck::env_add(
2864 slf.alloc,
2865 &mut slf.type_ctxt.type_env,
2866 id,
2867 ast,
2868 &slf.type_ctxt.term_env,
2869 &mut resolver,
2870 );
2871 //slf.asts.extend(resolver.new_asts.into_iter());
2872
2873 slf.type_ctxt
2874 .term_env
2875 .0
2876 .insert(id.ident(), (ast.clone(), slf.type_ctxt.term_env.clone()));
2877 Ok(())
2878 })?;
2879
2880 Ok(())
2881 }
2882
2883 /// Add the bindings of a record to the type environment. Ignore fields whose name are
2884 /// defined through interpolation.
2885 pub fn add_type_bindings(
2886 &mut self,
2887 mut slice: CacheHubView<'_>,
2888 term: &RichTerm,
2889 ) -> Result<(), NotARecord> {
2890 self.with_mut(|slf| {
2891 // It's sad, but for now, we have to convert the term back to an AST to insert it in
2892 // the type environment.
2893 let ast = term.to_ast(slf.alloc);
2894 let mut resolver = AstResolver::new(slf.alloc, slf.asts, slice.reborrow());
2895
2896 let ret = typecheck::env_add_term(
2897 slf.alloc,
2898 &mut slf.type_ctxt.type_env,
2899 ast,
2900 &slf.type_ctxt.term_env,
2901 &mut resolver,
2902 )
2903 .map_err(|_| NotARecord);
2904 ret
2905 })
2906 }
2907 }
2908}
2909
2910#[cfg(test)]
2911mod tests {
2912 use std::path::Path;
2913
2914 use super::*;
2915
2916 #[test]
2917 fn normalize_rel() {
2918 assert_eq!(
2919 &normalize_rel_path(Path::new("../a/../b")),
2920 Path::new("../b")
2921 );
2922 assert_eq!(
2923 &normalize_rel_path(Path::new("../../a/../b")),
2924 Path::new("../../b")
2925 );
2926 }
2927
2928 #[test]
2929 fn get_cached_source_with_relative_path() {
2930 let mut sources = SourceCache::new();
2931 let root_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("nickel-test-rootdir");
2932 let path = SourcePath::Path(root_path.join("file.ncl"), super::InputFormat::Nickel);
2933 let file_id = sources.replace_string(path, "1".into());
2934
2935 // This path should not exist on the host but should
2936 // match the in memory file that was set up in the cache
2937 let file = sources
2938 .get_or_add_file(
2939 root_path.join("subdir").join("..").join("file.ncl"),
2940 InputFormat::Nickel,
2941 )
2942 .expect("Missed cached file when pulling with relative path");
2943 assert_eq!(CacheOp::Cached(file_id), file);
2944 }
2945
2946 #[test]
2947 fn close_file() {
2948 let mut sources = SourceCache::new();
2949 let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("closed.ncl");
2950 let source_path = SourcePath::Path(path.clone(), InputFormat::Nickel);
2951 sources.add_string(source_path.clone(), "1".to_string());
2952 sources
2953 .close_in_memory_file(path.clone(), InputFormat::Nickel)
2954 .unwrap();
2955 assert_eq!(
2956 sources
2957 .file_ids
2958 .get(&source_path)
2959 .map(|it| it.source)
2960 .unwrap(),
2961 SourceKind::MemoryClosed
2962 );
2963
2964 // Since the closed file should be stale, id_or_new_timestamp_of should not return the
2965 // file ID for the closed file. Since in this case the file doesn't exist on the
2966 // filesystem, it should return an error.
2967 assert!(sources
2968 .id_or_new_timestamp_of(&path, InputFormat::Nickel)
2969 .is_err());
2970 }
2971}