nickel_lang_core/closurize.rs
1//! Closurization of terms.
2//!
3//! During evaluation, variable substitution is done lazily through the help of
4//! [the environment][crate::eval::Environment]. In consequence, the complete data required to
5//! evaluate an expression further isn't only the expression itself (the term), but the current
6//! environment as well. [crate::eval::Closure] is precisely a tuple of a term together with its
7//! environment.
8//!
9//! We often need to store a closure back into a term (without relying on the environment):
10//! typically, when we combine several operands (think merging or array concatenation), each with
11//! its own environment. This is what we call closurization: wrap a closure back as a term. By
12//! extension, several structures containing term can be closurized as well, wich means to
13//! closurize all the inner terms.
14
15use crate::{
16 eval::{
17 Closure, Environment,
18 cache::Cache,
19 value::{Array, ArrayData, NickelValue, ValueContentRef},
20 },
21 term::{
22 BindingType, RuntimeContract, Term,
23 record::{Field, FieldDeps, RecordData, RecordDeps},
24 },
25};
26
27/// Structures which can be packed together with their environment as a closure.
28///
29/// The typical implementer is [NickelValue], but structures containing terms can also be
30/// closurizable, such as the contract case in a [crate::typ::Type], an array of terms, etc.
31///
32/// In those cases, the inner terms are closurized.
33pub trait Closurize: Sized {
34 /// Pack a closurizable together with its environment `env` as a closure.
35 ///
36 /// By default, this is just `self.closurize_as_btype(cache, env, BindingType::default())`.
37 fn closurize<C: Cache>(self, cache: &mut C, env: Environment) -> Self {
38 self.closurize_as_btype(cache, env, BindingType::default())
39 }
40
41 /// Pack a closurizable together with its environment as a closure and with the dependencies
42 /// set according to the given binding type.
43 /// An exception is made for constant terms, which should be left untouched (it's useless to to
44 /// allocate a closure for them)
45 fn closurize_as_btype<C: Cache>(
46 self,
47 cache: &mut C,
48 env: Environment,
49 btype: BindingType,
50 ) -> Self;
51}
52
53impl Closurize for NickelValue {
54 fn closurize_as_btype<C: Cache>(
55 self,
56 cache: &mut C,
57 env: Environment,
58 btype: BindingType,
59 ) -> NickelValue {
60 // There is no case where closurizing a constant term makes sense, because it's already
61 // evaluated, it doesn't have any free variables and doesn't contain any unevaluated terms.
62 // Even the merge of recursive records is able to handle non-closurized constant terms, so
63 // we just return the original term.
64 if self.is_constant() {
65 return self;
66 }
67
68 // If the term is already a closure, we don't have to allocate a useless intermediate
69 // closure. We just transfer the original cache index to the new environment. This is not
70 // only an optimization: this is relied upon by recursive record merging when computing the
71 // fixpoint.
72 //
73 // More specifically, the evaluation of a recursive record patches the environment of each
74 // field with the indices recursively referring to the other fields of the record. `eval`
75 // assumes that a recursive record field is either a constant or a `Term::Closure` whose
76 // cache elements *immediately* contain the original unevaluated expression (both
77 // properties are true after the first evaluation of a value through
78 // `Term::Closurize(value)` and maintained when reverting elements before merging recursive
79 // records).
80 //
81 // To maintain this invariant, `closurize` must NOT introduce an indirection through a
82 // additional closure, such as transforming:
83 //
84 // ```
85 // {foo = <closure@1>, bar = 1} | 1 <- 1 + bar
86 // ```
87 //
88 // to:
89 //
90 // ```
91 // {foo = <closure@2>, bar = 1} | 2 <- (<closure@1> | 1 <- 1 + bar)
92 // ```
93 //
94 // In this case, the evaluation of the recursive records will patch the outer environment
95 // instead of the inner one, giving:
96 //
97 // ```
98 // {foo = <closure:2>, bar = 1} | 2 <- (<closure:1> | 1 <- 1 + bar), bar <- 1
99 // ```
100 //
101 // Then, evaluating `foo` would unduly raise an unbound identifier error.
102 let idx = match self.content_ref() {
103 // If we just need a normal closure, and we find a normal closure inside the thunk, we
104 // reuse it
105 ValueContentRef::Thunk(thunk)
106 if thunk.deps().is_empty() && matches!(btype, BindingType::Normal) =>
107 {
108 self.try_into_thunk().unwrap()
109 }
110 ValueContentRef::Term(Term::Var(id)) if id.is_generated() => {
111 let id = *id;
112 // Albeit we should always find a generated variable in the environment,
113 // `env.get` is technically fallible, while this method is not. We thus wrap a
114 // potential error in a new closure which will propagate the unbound
115 // identifier error upon evaluation.
116 env.get(&id.ident()).cloned().unwrap_or_else(move || {
117 debug_assert!(false, "missing generated variable {id} in environment");
118 cache.add(Closure { value: self, env }, btype)
119 })
120 }
121 content => {
122 // It's suspicious to wrap a closure with existing dependencies in a new closure,
123 // although I'm not sure it would actually break anything. We panic in debug mode
124 // to catch this case.
125 debug_assert!(
126 !matches!((content, &btype), (ValueContentRef::Thunk(thunk), BindingType::Revertible(_)) if !thunk.deps().is_empty()),
127 "wrapping a closure with non-empty deps in a new closure with different deps"
128 );
129
130 cache.add(Closure { value: self, env }, btype)
131 }
132 };
133
134 idx.into()
135 }
136}
137
138impl Closurize for RuntimeContract {
139 fn closurize_as_btype<C: Cache>(
140 self,
141 cache: &mut C,
142 env: Environment,
143 btype: BindingType,
144 ) -> RuntimeContract {
145 self.map_contract(|ctr| ctr.closurize_as_btype(cache, env, btype.clone()))
146 }
147}
148
149impl Closurize for Vec<RuntimeContract> {
150 fn closurize_as_btype<C: Cache>(
151 self,
152 cache: &mut C,
153 env: Environment,
154 btype: BindingType,
155 ) -> Vec<RuntimeContract> {
156 self.into_iter()
157 .map(|pending_contract| {
158 pending_contract.closurize_as_btype(cache, env.clone(), btype.clone())
159 })
160 .collect()
161 }
162}
163
164impl Closurize for Field {
165 fn closurize_as_btype<C: Cache>(
166 self,
167 cache: &mut C,
168 env: Environment,
169 btype: BindingType,
170 ) -> Field {
171 let pending_contracts =
172 self.pending_contracts
173 .closurize_as_btype(cache, env.clone(), btype.clone());
174 let value = self
175 .value
176 .map(|value| value.closurize_as_btype(cache, env, btype));
177
178 Field {
179 metadata: self.metadata,
180 value,
181 pending_contracts,
182 }
183 }
184}
185
186impl Closurize for Array {
187 fn closurize_as_btype<C: Cache>(
188 self,
189 cache: &mut C,
190 env: Environment,
191 btype: BindingType,
192 ) -> Self {
193 self.into_iter()
194 .map(|val| {
195 if should_share(&val) {
196 val.closurize_as_btype(cache, env.clone(), btype.clone())
197 } else {
198 val
199 }
200 })
201 .collect()
202 }
203}
204
205// Closurize an array together with its pending contracts
206impl Closurize for ArrayData {
207 fn closurize_as_btype<C: Cache>(
208 self,
209 cache: &mut C,
210 env: Environment,
211 btype: BindingType,
212 ) -> Self {
213 let pending_contracts =
214 self.pending_contracts
215 .closurize_as_btype(cache, env.clone(), btype.clone());
216
217 ArrayData {
218 array: self
219 .array
220 .closurize_as_btype(cache, env.clone(), btype.clone()),
221 pending_contracts,
222 }
223 }
224}
225
226impl Closurize for RecordData {
227 fn closurize_as_btype<C: Cache>(
228 self,
229 cache: &mut C,
230 env: Environment,
231 btype: BindingType,
232 ) -> Self {
233 // We don't closurize the sealed tail, if any, because the underlying term is a private
234 // field and is supposed to be already closurized.
235 // TODO: should we change the type of SealedTail.term to CacheIndex to reflect that?
236 RecordData {
237 fields: self
238 .fields
239 .into_iter()
240 .map(|(id, field)| {
241 (
242 id,
243 field.closurize_as_btype(cache, env.clone(), btype.clone()),
244 )
245 })
246 .collect(),
247 ..self
248 }
249 }
250}
251
252/// Decides is an expression is worth being wrapped in a thunk. This is almost the same as asking
253/// if the expression is a weak head normal form, in which case it's already evaluated and doesn't
254/// need to be cached, but with some subtle differences.
255pub fn should_share(value: &NickelValue) -> bool {
256 match value.content_ref() {
257 ValueContentRef::Term(Term::Var(_) | Term::Fun(_)) => false,
258 ValueContentRef::Term(_)
259 // In general types are WHNF that shouldn't be shared, but currently the have an additional
260 // field storing their conversion to a contract. In order to take advantage of this local
261 // cache, we need to share them.
262 | ValueContentRef::Type(_) => true,
263 ValueContentRef::Null
264 | ValueContentRef::Bool(_)
265 // Now that arrays and records have proper constructors and are assumed to be closurized,
266 // they are effectively WHNFs (they used to represent both arrays and records, and the
267 // equivalent of today's `Term::Closurize(array_or_record)`, the latter not being a WHNF)
268 | ValueContentRef::Array(_)
269 | ValueContentRef::Record(_)
270 | ValueContentRef::Number(_)
271 | ValueContentRef::String(_)
272 | ValueContentRef::Label(_)
273 | ValueContentRef::SealingKey(_)
274 | ValueContentRef::ForeignId(_)
275 | ValueContentRef::Thunk(_)
276 // a custom contract is a function, and is thus a WHNF
277 | ValueContentRef::CustomContract(_) => false,
278 ValueContentRef::EnumVariant(enum_variant) => enum_variant.arg.is_some(),
279 }
280}
281
282/// Closurize all the inner terms of a recursive record, including pending contracts and
283/// dynamically defined fields.
284pub fn closurize_rec_record<C: Cache>(
285 cache: &mut C,
286 data: RecordData,
287 dyn_fields: Vec<(NickelValue, Field)>,
288 deps: Option<RecordDeps>,
289 env: Environment,
290) -> (RecordData, Vec<(NickelValue, Field)>) {
291 let fields = data
292 .fields
293 .into_iter()
294 .map(|(id, field)| {
295 let field_deps = deps
296 .as_ref()
297 .and_then(|deps| deps.stat_fields.get(&id.ident()))
298 .cloned();
299
300 (
301 id,
302 field.closurize_as_btype(cache, env.clone(), mk_binding_type(field_deps.clone())),
303 )
304 })
305 .collect();
306
307 let dyn_fields = dyn_fields
308 .into_iter()
309 .enumerate()
310 .map(|(index, (id_t, field))| {
311 let field_deps = deps
312 .as_ref()
313 .and_then(|deps| deps.dyn_fields.get(index))
314 .cloned();
315 // Identifier expressions aren't currently allowed to recursively depend on another
316 // field, so we closurize them as normal bindings.
317 (
318 id_t.closurize(cache, env.clone()),
319 field.closurize_as_btype(cache, env.clone(), mk_binding_type(field_deps.clone())),
320 )
321 })
322 .collect();
323
324 let data = RecordData { fields, ..data };
325
326 (data, dyn_fields)
327}
328
329fn mk_binding_type(field_deps: Option<FieldDeps>) -> BindingType {
330 // If the fields has an empty set of dependencies, we can eschew the useless introduction of a
331 // revertible element. Note that `field_deps` being `None` doesn't mean "empty dependencies"
332 // but rather that the dependencies haven't been computed. In the latter case, we must be
333 // conservative and assume the field can depend on any other field.
334 let is_non_rec = field_deps
335 .as_ref()
336 .map(FieldDeps::is_empty)
337 .unwrap_or(false);
338 if is_non_rec {
339 BindingType::Normal
340 } else {
341 BindingType::Revertible(field_deps.unwrap_or(FieldDeps::Unknown))
342 }
343}