nickel_lang_parser/uniterm.rs
1//! Additional AST nodes for the common UniTerm syntax (see RFC002 for more details).
2use super::{error::InvalidRecordTypeError, *};
3use error::ParseError;
4use indexmap::{IndexMap, map::Entry};
5
6use crate::{
7 ast::{
8 self,
9 record::{FieldDef, FieldMetadata, FieldPathElem, Include},
10 typ::{EnumRow, EnumRows, RecordRow, RecordRows, Type},
11 *,
12 },
13 environment::Environment,
14 identifier::Ident,
15 position::{RawSpan, TermPos},
16 typ::{DictTypeFlavour, EnumRowsF, RecordRowsF, TypeF, VarKind},
17};
18
19use std::{
20 cell::RefCell,
21 collections::{HashMap, HashSet},
22};
23
24/// A node of the uniterm AST. We only define new variants for those constructs that are common to
25/// types and terms. Otherwise, we piggyback on the existing ASTs to avoid duplicating methods and
26/// type definitions.
27///
28/// During parsing, some constructs are common to both terms and types, such as variables or record
29/// literals ([`UniRecord`]s). They may be the source of ambiguities in the grammar, if we add two
30/// different derivation for them. This happens inside rules that accepts both a term and and a
31/// type, say the operands of an arrow `lhs -> rhs`. Take for example `lhs` to be a variable `a`.
32/// It's hard not to end up having two possible derivation for `a`, one going first through term
33/// rules and then types rules (a custom contract `#a` in the old syntax), and vice-versa (a type
34/// variable `a` in the old syntax).
35///
36/// To avoid the issue, we parse the source as a `UniTermNode`s, and give only one possible
37/// derivation - we will continue with the variable example in the following - through the parsing
38/// rules. As long as we can't decide yet how to see this variable, we keep it as a
39/// `UniTermNode::Var`.
40///
41/// As soon as this variable is used in a compound expression, the top-level rule tells us how to
42/// translate it. For example, if we see an arrow `a -> Num`, then we will convert it to a type
43/// variable, and return `UniTermNode::Type(TypeF::Arrow(..))` (there is actually a subtlety: see
44/// the in-code documentation of the private symbol `FixTypeVars::fix_type_vars`, but let's ignore
45/// it here). If, on the other hand, we enter the rule for an infix operator as in `a + 1`, `a` will
46/// be converted to a `Term::Var` and the resulting uniterm will be
47/// `UniTermNode::Term(Term::Op2(..))`.
48pub enum UniTermNode<'ast> {
49 /// A variable. Can refer both to a term variable or a type variable.
50 Var(LocIdent),
51 /// A record. Can refer both to a record literal or a record type.
52 Record(UniRecord<'ast>),
53 /// A uniterm that has been determined to be a term.
54 Term(Ast<'ast>),
55 /// A uniterm that has been determined to be a type.
56 Type(Type<'ast>),
57}
58
59/// A uniterm with positional information.
60pub struct UniTerm<'ast> {
61 node: UniTermNode<'ast>,
62 pos: TermPos,
63}
64
65impl<'ast> From<UniTermNode<'ast>> for UniTerm<'ast> {
66 fn from(node: UniTermNode<'ast>) -> Self {
67 UniTerm {
68 node,
69 pos: TermPos::None,
70 }
71 }
72}
73
74impl UniTerm<'_> {
75 pub fn with_pos(mut self, pos: TermPos) -> Self {
76 self.pos = pos;
77 self
78 }
79}
80
81// For nodes such as `Type` or `Record`, the following implementation has to choose between two
82// positions to use: the one of the wrapping `UniTerm`, and the one stored inside the `Ast` or
83// the `Type`. This implementation assumes that the latest set is the one of `UniTerm`, which is
84// the single source of truth. In fact, it happens that only the outermost uniterm position is set
85// while the innermost is still `TermPos::None`.
86impl<'ast> TryConvert<'ast, UniTerm<'ast>> for Type<'ast> {
87 type Error = ParseError;
88
89 fn try_convert(alloc: &'ast AstAlloc, ut: UniTerm<'ast>) -> Result<Self, ParseError> {
90 let pos = ut.pos;
91
92 let typ = match ut.node {
93 UniTermNode::Var(id) => TypeF::Var(id.ident()),
94 UniTermNode::Record(r) => Type::try_convert(alloc, r)?.typ,
95 UniTermNode::Type(ty) => ty.typ,
96 UniTermNode::Term(ast) => {
97 if matches!(
98 ast.node,
99 Node::Null
100 | Node::Bool(_)
101 | Node::Number(_)
102 | Node::String(_)
103 | Node::Array(_)
104 | Node::EnumVariant { .. }
105 | Node::StringChunks(_)
106 ) {
107 //unwrap(): uniterms are supposed to come from the parser, and thus have a
108 //well-defined position
109 return Err(ParseError::InvalidContract(ut.pos.unwrap()));
110 }
111
112 TypeF::Contract(alloc.alloc(Ast {
113 node: ast.node,
114 pos,
115 }))
116 }
117 };
118
119 Ok(Type { typ, pos })
120 }
121}
122
123impl<'ast> TryConvert<'ast, UniTerm<'ast>> for Ast<'ast> {
124 type Error = ParseError;
125
126 fn try_convert(alloc: &'ast AstAlloc, ut: UniTerm<'ast>) -> Result<Self, ParseError> {
127 let UniTerm { node, pos } = ut;
128
129 let node = match node {
130 UniTermNode::Var(id) => Node::Var(id),
131 UniTermNode::Record(r) => Ast::try_convert(alloc, r)?.node,
132 UniTermNode::Type(typ) => {
133 let typ = typ.fix_type_vars(alloc, pos.unwrap())?;
134
135 if let TypeF::Contract(ctr) = typ.typ {
136 ctr.node.clone()
137 } else {
138 alloc.typ(typ)
139 }
140 }
141 UniTermNode::Term(ast) => ast.node,
142 };
143
144 Ok(Ast { node, pos })
145 }
146}
147
148impl<'ast> From<Ast<'ast>> for UniTerm<'ast> {
149 fn from(ast: Ast<'ast>) -> Self {
150 let pos = ast.pos;
151
152 UniTerm {
153 node: UniTermNode::Term(ast),
154 pos,
155 }
156 }
157}
158
159impl<'ast> From<Node<'ast>> for UniTerm<'ast> {
160 fn from(node: Node<'ast>) -> Self {
161 UniTerm {
162 node: UniTermNode::Term(node.into()),
163 pos: TermPos::None,
164 }
165 }
166}
167
168impl<'ast> From<Type<'ast>> for UniTerm<'ast> {
169 fn from(ty: Type<'ast>) -> Self {
170 let pos = ty.pos;
171 UniTerm {
172 node: UniTermNode::Type(ty),
173 pos,
174 }
175 }
176}
177
178impl<'ast> From<UniRecord<'ast>> for UniTerm<'ast> {
179 fn from(ur: UniRecord<'ast>) -> Self {
180 let pos = ur.pos;
181
182 UniTerm {
183 node: UniTermNode::Record(ur),
184 pos,
185 }
186 }
187}
188
189impl<T, U> TryConvert<'_, T> for U
190where
191 U: TryFrom<T>,
192{
193 type Error = U::Error;
194
195 fn try_convert(_: &AstAlloc, from: T) -> Result<Self, Self::Error> {
196 U::try_from(from)
197 }
198}
199
200/// A record in the `UniTerm` syntax.
201#[derive(Clone)]
202pub struct UniRecord<'ast> {
203 pub fields: Vec<FieldDef<'ast>>,
204 pub tail: Option<(RecordRows<'ast>, TermPos)>,
205 pub includes: Vec<Include<'ast>>,
206 pub open: bool,
207 pub pos: TermPos,
208 /// The position of the final ellipsis `..`, if any. Used for error reporting. `pos_ellipsis`
209 /// must be different from `TermPos::None` if and only if `attrs.open` is `true`.
210 pub pos_ellipsis: TermPos,
211}
212
213impl<'ast> UniRecord<'ast> {
214 /// Check if a field definition has a type annotation but no definition. This is currently
215 /// forbidden for record literals that aren't record types. In that case, raise the
216 /// corresponding parse error.
217 pub fn check_typed_field_without_def(&self) -> Result<(), ParseError> {
218 enum FieldState {
219 // A field with a type annotation but without a definition was encountered. Still, we
220 // might find a definition later, because of piecewise definitions
221 Candidate((RawSpan, RawSpan)),
222 // Marker to indicate that a field has been defined before, and can no longer raise an
223 // error.
224 Defined,
225 }
226
227 // We have to be a bit careful because of piecewise definitions. That is, we still want to
228 // accept record literals such as:
229 //
230 // ```
231 // {
232 // map : forall a b. (a -> b) -> Array a -> Array b,
233 // map = fun f array => ...
234 // }
235 // ```
236 //
237 // On the other hand, it's a bit too complex to handle the case of piecewise definitions:
238 //
239 // ```
240 // {
241 // foo.bar.baz : Num,
242 // foo.bar.baz = 1,
243 // }
244 // ```
245 //
246 // This is arguably much less common and useful. In this case, we are more restrictive and
247 // reject such an example, although it would theoretically be acceptable as it's elaborated
248 // as a record literal that is accepted:
249 //
250 // ```
251 // { foo = { bar = {baz : Num = 1 } } }
252 // ```
253 // We're using an index map because this map impacts the determinism of error reporting.
254 let mut candidate_fields = IndexMap::new();
255
256 let first_without_def = self.fields.iter().find_map(|field_def| {
257 let path_as_ident = field_def.path_as_ident();
258
259 match &field_def {
260 FieldDef {
261 path: _,
262 value: None,
263 metadata:
264 FieldMetadata {
265 annotation: Annotation { typ: Some(typ), .. },
266 ..
267 },
268 ..
269 } => {
270 // If the path is a single identifier, we don't error out right away, because
271 // we might already have found a definition for this field, or might do later
272 // in the loop.
273 if let Some(ident) = path_as_ident {
274 match candidate_fields.entry(ident.ident()) {
275 // If the hashmap is occupied, we've met this field before. Either
276 // there is another definition without annotation, in which case
277 // there's no need to replace it, or there is a `Defined` element,
278 // which means this is false positive that we can ignore. In both cases,
279 // we don't have anytning more to do
280 Entry::Occupied(_) => None,
281 Entry::Vacant(vacant_entry) => {
282 vacant_entry.insert(FieldState::Candidate((
283 ident.pos.unwrap(),
284 typ.pos.unwrap(),
285 )));
286 None
287 }
288 }
289 }
290 // We don't do anything smart for composite paths: we raise an error right way
291 else {
292 Some((field_def.pos.unwrap(), typ.pos.unwrap()))
293 }
294 }
295 field_def => {
296 if let (Some(ident), Some(_)) = (path_as_ident, &field_def.value) {
297 candidate_fields.insert(ident.ident(), FieldState::Defined);
298 }
299
300 None
301 }
302 }
303 });
304
305 let first_without_def =
306 first_without_def.or(candidate_fields.into_iter().find_map(|(_, field_state)| {
307 if let FieldState::Candidate(spans) = field_state {
308 Some(spans)
309 } else {
310 None
311 }
312 }));
313
314 if let Some((ident_span, annot_span)) = first_without_def {
315 Err(ParseError::TypedFieldWithoutDefinition {
316 field_span: ident_span,
317 annot_span,
318 })
319 } else {
320 Ok(())
321 }
322 }
323
324 /// Checks if this record qualifies as a record type. If this function
325 /// returns `true`, then [Self::into_type_strict] must succeed.
326 pub fn is_record_type(&self) -> bool {
327 self.fields.iter().all(|field_def| {
328 // Field paths with a depth > 1 are not supported in record types.
329 field_def.path.len() == 1
330 // Warning: this pattern must stay in sync with the
331 // corresponding pattern in `into_type_strict`.
332 && matches!(&field_def,
333 FieldDef {
334 value: None,
335 metadata:
336 FieldMetadata {
337 doc: None,
338 annotation:
339 Annotation {
340 typ: Some(_),
341 contracts,
342 },
343 opt: false,
344 not_exported: false,
345 priority: MergePriority::Neutral,
346 },
347 ..
348 } if contracts.is_empty())
349 }) && self.includes.is_empty()
350 }
351
352 /// Turns this record into a plain record type, uniquely containing fields of the form `fields:
353 /// Type`. Currently, this doesn't support the field path syntax: `{foo.bar.baz :
354 /// Type}.into_type_strict()` returns an `Err`.
355 pub fn into_type_strict(
356 self,
357 alloc: &'ast AstAlloc,
358 ) -> Result<Type<'ast>, InvalidRecordTypeError> {
359 fn term_to_record_rows<'ast>(
360 alloc: &'ast AstAlloc,
361 id: LocIdent,
362 field_def: FieldDef<'ast>,
363 tail: RecordRows<'ast>,
364 ) -> Result<RecordRows<'ast>, InvalidRecordTypeError> {
365 match field_def {
366 // Warning: this pattern must stay in sync with the corresponding pattern in
367 // `is_record_type`.
368 FieldDef {
369 path: _,
370 value: None,
371 metadata:
372 FieldMetadata {
373 doc: None,
374 annotation:
375 Annotation {
376 typ: Some(typ),
377 contracts: [],
378 },
379 opt: false,
380 not_exported: false,
381 priority: MergePriority::Neutral,
382 },
383 pos: _,
384 } => Ok(RecordRows(RecordRowsF::Extend {
385 row: RecordRow {
386 id,
387 typ: alloc.type_data(typ.typ, typ.pos),
388 },
389 tail: alloc.record_rows(tail.0),
390 })),
391 _ => {
392 Err(InvalidRecordTypeError::InvalidField(
393 // Position of identifiers must always be set at this stage (parsing)
394 id.pos.fuse(field_def.pos).unwrap(),
395 ))
396 }
397 }
398 }
399
400 // An open record (with an ellipsis `..` at the end) can't be translated to a record type.
401 // `pos_ellipsis` should be set iff `attrs.open` is true.
402 debug_assert!((self.pos_ellipsis == TermPos::None) != self.open);
403
404 if let Some(raw_span) = self.pos_ellipsis.into_opt() {
405 return Err(InvalidRecordTypeError::IsOpen(raw_span));
406 }
407
408 if let Some(raw_span) = self.includes.first().map(|incl| incl.ident.pos.unwrap()) {
409 return Err(InvalidRecordTypeError::InterpolatedField(raw_span));
410 }
411
412 // Track the field names we've seen, to check for duplicates.
413 let mut fields_seen = HashMap::new();
414
415 let rrows = self
416 .fields
417 .into_iter()
418 // Because we build row types as a linked list by folding on the original iterator, the
419 // order of identifiers is reversed. This not a big deal but it's less confusing to the
420 // user to print them in the original order for error reporting.
421 .rev()
422 .try_fold(
423 self.tail
424 .map(|(tail, _)| tail)
425 .unwrap_or(RecordRows(RecordRowsF::Empty)),
426 |acc: RecordRows, field_def| {
427 // We don't support compound paths for types, yet.
428 // All positions can be unwrapped because we're still parsing.
429 if field_def.path.len() > 1 {
430 let span = field_def
431 .path
432 .iter()
433 .map(|path_elem| path_elem.pos().unwrap())
434 .reduce(|acc, span| acc.fuse(span).unwrap_or(acc))
435 // We already checked that the path is non-empty.
436 .unwrap();
437
438 Err(InvalidRecordTypeError::InvalidField(span))
439 } else {
440 let elem = field_def.path.last().unwrap();
441
442 let id = match elem {
443 FieldPathElem::Ident(id) => *id,
444 FieldPathElem::Expr(_) => {
445 return Err(InvalidRecordTypeError::InterpolatedField(
446 field_def.pos.unwrap(),
447 ));
448 }
449 };
450 if let Some(prev_id) = fields_seen.insert(id.ident(), id) {
451 return Err(InvalidRecordTypeError::RepeatedField {
452 // Because we're iterating backwards, `id` came first.
453 orig: id.pos.unwrap(),
454 dup: prev_id.pos.unwrap(),
455 });
456 }
457
458 term_to_record_rows(alloc, id, field_def, acc)
459 }
460 },
461 )?;
462 Ok(Type {
463 typ: TypeF::Record(rrows),
464 pos: self.pos,
465 })
466 }
467
468 pub fn with_pos(mut self, pos: TermPos) -> Self {
469 self.pos = pos;
470 self
471 }
472}
473
474impl<'ast> TryConvert<'ast, UniRecord<'ast>> for Ast<'ast> {
475 type Error = ParseError;
476
477 /// Convert a `UniRecord` to a term. If the `UniRecord` is syntactically a record type or it
478 /// has a tail, it is first interpreted as a type and then wrapped in a `Term::Type`. One
479 /// exception is the empty record, which behaves the same both as a type and a contract, and
480 /// turning an empty record literal to an opaque contract would break everything, so the empty
481 /// record is always interpreted as a term directly.
482 ///
483 /// If the unirecord isn't a record type and doesn't have a tail, it is interpreted as an
484 /// equivalent record term. Fail if the `UniRecord` has a tail but isn't syntactically a record
485 /// type either.
486 ///
487 /// We also fix the type variables of the type appearing inside annotations (see in-code
488 /// documentation of the private symbol `FixTypeVars::fix_type_vars`).
489 fn try_convert(alloc: &'ast AstAlloc, ur: UniRecord<'ast>) -> Result<Self, ParseError> {
490 let pos = ur.pos;
491
492 // First try to interpret this record as a type.
493 if ur.tail.is_some() || (ur.is_record_type() && !ur.fields.is_empty()) {
494 let tail_span = ur.tail.as_ref().and_then(|t| t.1.into_opt());
495 // We unwrap all positions: at this stage of the parsing, they must all be set
496 let typ =
497 ur.into_type_strict(alloc)
498 .map_err(|cause| ParseError::InvalidRecordType {
499 tail_span,
500 record_span: pos.unwrap(),
501 cause,
502 })?;
503
504 let typ = typ.fix_type_vars(alloc, pos.unwrap())?;
505
506 Ok(alloc.typ(typ).spanned(pos))
507 } else {
508 ur.check_typed_field_without_def()?;
509
510 let UniRecord { fields, open, .. } = ur;
511
512 let field_defs_fixed = fields
513 .into_iter()
514 .map(|field_def| {
515 Ok(FieldDef {
516 metadata: fix_field_types(
517 alloc,
518 field_def.metadata,
519 field_def.pos.unwrap(),
520 )?,
521 ..field_def
522 })
523 })
524 .collect::<Result<Vec<_>, ParseError>>()?;
525
526 Ok(alloc
527 .record(ast::record::Record {
528 field_defs: alloc.alloc_many(field_defs_fixed),
529 // Note that we don't need to fix field types for includes: the parser does it
530 // already, because when it sees an include expression, it knows the current
531 // literal must be a record value and not a record type.
532 includes: alloc.alloc_many(ur.includes),
533 open,
534 })
535 .spanned(pos))
536 }
537 }
538}
539
540/// Try to convert a `UniRecord` to a type. The strict part means that the `UniRecord` must be
541impl<'ast> TryConvert<'ast, UniRecord<'ast>> for Type<'ast> {
542 type Error = ParseError;
543
544 /// Convert a `UniRecord` to a type. If the `UniRecord` has a tail, it is interpreted strictly
545 /// as a type and fail if it isn't a plain record type. Otherwise, we first try to interpret it
546 /// as a plain record type, and if that doesn't work, we interpret it as a term and wrap it
547 /// back as a user-defined contract.
548 fn try_convert(alloc: &'ast AstAlloc, ur: UniRecord<'ast>) -> Result<Self, ParseError> {
549 let pos = ur.pos;
550
551 if let Some((_, tail_pos)) = ur.tail {
552 ur.into_type_strict(alloc)
553 .map_err(|cause| ParseError::InvalidRecordType {
554 tail_span: tail_pos.into_opt(),
555 record_span: pos.unwrap(),
556 cause,
557 })
558 } else {
559 let pos = ur.pos;
560 ur.clone().into_type_strict(alloc).or_else(|_| {
561 Ast::try_convert(alloc, ur).map(|ast| Type {
562 typ: TypeF::Contract(alloc.alloc(ast)),
563 pos,
564 })
565 })
566 }
567 }
568}
569
570/// Cell providing shared mutable access to a var_kind. This is used to decide the kind of a
571/// variable associated to a forall in the `fix_type_vars` phase.
572///
573/// This cell provides interior mutability for [`VarKind`]. It makes it possible to mutate the
574/// inner data when put in an environment, which only provides immutable references to its values.
575#[derive(PartialEq, Eq)]
576pub(super) struct VarKindCell(RefCell<Option<VarKind>>);
577
578/// Error raised by [`VarKindCell`] when trying to set a variable kind which is different from the
579/// one already set.
580#[derive(Copy, Clone, Eq, PartialEq, Debug)]
581pub(super) struct VarKindMismatch;
582
583/// Environment maintained during the `fix_type_vars` phase. Used both to determine if a variable
584/// is bound by an enclosing forall (if `env.get(var_id).is_some()`), and to provide a shared
585/// mutable variable kind that can be modified depending on the location of type variable
586/// occurrences.
587pub(super) type BoundVarEnv = Environment<Ident, VarKindCell>;
588
589impl VarKindCell {
590 /// Create a new unset `VarKindCell` at resolution time, this will default to `VarKind::Type`,
591 pub(super) fn new() -> Self {
592 VarKindCell(RefCell::new(None))
593 }
594
595 /// Everywhere a forall variable is used it must be of the same type. If this is the first time
596 /// we encounter the variable, we can set it freely. If it has been set, and is of the same
597 /// type, we only need to combine `excluded` record row variables. If it has been set to a
598 /// different `VarKind`, we return `Err(_)`.
599 pub(super) fn try_set(&self, var_kind: VarKind) -> Result<(), VarKindMismatch> {
600 match (&mut *self.0.borrow_mut(), var_kind) {
601 (s @ None, var_kind) => {
602 *s = Some(var_kind);
603 Ok(())
604 }
605 (Some(data), var_kind) if data == &var_kind => Ok(()),
606 (
607 Some(VarKind::RecordRows { excluded: ex1 }),
608 VarKind::RecordRows { excluded: ex2 },
609 ) => {
610 ex1.extend(ex2);
611 Ok(())
612 }
613 (Some(VarKind::EnumRows { excluded: ex1 }), VarKind::EnumRows { excluded: ex2 }) => {
614 ex1.extend(ex2);
615 Ok(())
616 }
617 _ => Err(VarKindMismatch),
618 }
619 }
620
621 /// Return a clone of the current var_kind.
622 #[allow(dead_code)]
623 pub fn var_kind(&self) -> Option<VarKind> {
624 self.0.borrow().clone()
625 }
626
627 /// Return the inner var_kind, leaving `Nothing` behind in the `VarKindCell`.
628 pub fn take_var_kind(&self) -> Option<VarKind> {
629 self.0.borrow_mut().take()
630 }
631}
632
633pub(super) trait FixTypeVars<'ast>
634where
635 Self: Sized,
636{
637 /// Post-process a type at the right hand side of an annotation by replacing each unbound type
638 /// variable `TypeF::Var(id)` by a term variable with the same identifier seen as a custom
639 /// contract `TypeF::Contract(Node::Var(id))`.
640 ///
641 /// Additionally, this passes determine the kind of a variable introduced by a forall binder.
642 ///
643 /// # Type variable fixing
644 ///
645 /// Since parsing is done bottom-up, and given the specification of the uniterm syntax for
646 /// variables occurring in types, we often can't know right away if such a variable occurrence
647 /// will eventually be a type variable or a term variable seen as a custom contract.
648 ///
649 /// Take for example `a -> b`. At this stage, `a` and `b` could be both variables referring to
650 /// a contract (e.g. in `x | a -> b`) or type variables (e.g. in `x | forall a b. a -> b`),
651 /// depending on enclosing `forall`s. To handle both cases, we initially parse all variables
652 /// inside types as type variables. When reaching the right-hand side of an annotation, because
653 /// `forall`s can only bind locally in a type, we can then decide the actual nature of each
654 /// occurrence. We thus recurse into the newly constructed type to change those type variables
655 /// that are not actually bound by a `forall` to be term variables. This is the role of
656 /// `fix_type_vars()`.
657 ///
658 /// Since `forall`s only bind type variables locally and don't cross contract boundaries, we
659 /// don't have to recurse into contracts and this pass will only visit each node of the AST at
660 /// most once in total (and most probably much less so). In some sense, we just visit the type
661 /// layer, or type spine, composed only of type constructors.
662 ///
663 /// There is one subtlety with unirecords, though. A unirecord can still be in interpreted as a
664 /// record type later. Take the following example:
665 ///
666 /// ```nickel
667 /// let mk_pair : forall a b. a -> b -> {fst: a, snd: b} = <exp>
668 /// ```
669 ///
670 /// Since this unirecord will eventually be interpreted as a record type, we can't know yet when
671 /// parsing `fst: a` if `a` will be a type variable or a term variable (while, for all other
672 /// constructs, an annotation is a boundary that `forall` binders can't cross). In this example,
673 /// there is indeed an enclosing forall binding `a`. With unirecords, before fixing type
674 /// variables, we have to wait until we eventually convert the unirecord to a term (in which
675 /// case we fix all the top-level annotations) or a type (in which case we do nothing: the
676 /// enclosing type will trigger the fix once it's fully constructed). Fixing a unirecord prior
677 /// to a conversion to a term is done by [`fix_field_types`].
678 ///
679 /// # Variable kind
680 ///
681 /// Type variables can have different kind (cf [crate::typ::VarKind]). The kind determines if
682 /// the variable is meant to be substituted for a type, record rows, or enum rows.
683 ///
684 /// During parsing, the kind is determined syntactically, depending on where the corresponding
685 /// bound occurrences occur:
686 ///
687 /// ```nickel
688 /// # occurs in type position
689 /// forall a. Num -> {_: Bar -> a}
690 /// # occurs in record rows position
691 /// forall a. Num -> {foo: Str ; a} -> {bar: Str ; a}
692 /// # occurs both in record rows and enum rows position
693 /// # this is inconsistent and will raise a parse error
694 /// forall a. [| 'foo, 'bar; a |] -> {foo : Str, bar: Str; a}
695 /// ```
696 fn fix_type_vars(self, alloc: &'ast AstAlloc, span: RawSpan) -> Result<Self, ParseError> {
697 Ok(self
698 .fix_type_vars_env(alloc, BoundVarEnv::new(), span)?
699 .unwrap_or(self))
700 }
701
702 /// Same as [Self::fix_type_vars], but takes `self` as a reference instead, and returns
703 /// `Ok(None)` when `self` hasn't been modified by the type fixing phase or
704 /// `Ok(Some(new_self))` with a modified, owned `self` upon change.
705 fn fix_type_vars_ref(
706 &self,
707 alloc: &'ast AstAlloc,
708 span: RawSpan,
709 ) -> Result<Option<Self>, ParseError> {
710 self.fix_type_vars_env(alloc, BoundVarEnv::new(), span)
711 }
712
713 /// Fix type vars in a given environment of variables bound by foralls enclosing this type. The
714 /// environment maps bound variables to a reference to the variable kind of the corresponding
715 /// forall.
716 ///
717 /// # Ownership
718 ///
719 /// [Self::fix_type_vars_env] might need to be called both on owned data and on immutably
720 /// borrowed data (e.g. [`Type`][crate::bytecode::ast::typ::Type] and [`&'ast
721 /// Type`][crate::bytecode::ast::typ::Type]). We don't want to duplicate the logic of
722 /// [Self::fix_type_vars_env] for both, as we can't write one that is generic enough while
723 /// properly avoiding useless allocations.
724 ///
725 /// The idea of the current API is that even when operating on owned data, `self` is taken by
726 /// reference. If `self` isn't modified by the fix type phase, then `None` is returned and the
727 /// caller can just reuse the original `self` how they please.
728 ///
729 /// If `self` has been modified by the fix type phase, then `Some(new_value)` is returned with
730 /// a new owned version of `self`. If the caller needed an owned version, the job is done.
731 /// Otherwise, the caller can use [the ast allocator `alloc`][crate::bytecode::ast::AstAlloc]
732 /// to move the owned data into the allocator and get an `&'ast` reference out of it. The only
733 /// cost is that for owned data, we could have reused the original `self` instead of returning
734 /// a new one, but this is a detail: in practice, only the top-level call of `fix_type_vars` is
735 /// performed on owned data, and the recursive calls are all performed on `&'ast` references.
736 /// At worse, we waste the top-level node, which is stack-allocated anyway.
737 ///
738 /// Because AST nodes are allocated in an arena and are immutable, they won't be reclaimed
739 /// until the whole AST is finally transformed to either the mainline AST or (in the future)
740 /// compiled to bytecode. We want to avoid building useless copies of existing nodes, which is
741 /// the reason behind not using a simpler strategy of just always returning a new value, that
742 /// might be identical to the old one if no type variable has been fixed.
743 fn fix_type_vars_env(
744 &self,
745 alloc: &'ast AstAlloc,
746 bound_vars: BoundVarEnv,
747 span: RawSpan,
748 ) -> Result<Option<Self>, ParseError>;
749}
750
751impl<'ast> FixTypeVars<'ast> for Type<'ast> {
752 fn fix_type_vars_env(
753 &self,
754 alloc: &'ast AstAlloc,
755 mut bound_vars: BoundVarEnv,
756 span: RawSpan,
757 ) -> Result<Option<Self>, ParseError> {
758 use crate::ast::typ::TypeUnr;
759
760 let pos = self.pos;
761
762 let build_fixed = |new_type: TypeUnr<'ast>| -> Self { Type { typ: new_type, pos } };
763
764 match self.typ {
765 TypeF::Dyn
766 | TypeF::Number
767 | TypeF::Bool
768 | TypeF::String
769 | TypeF::ForeignId
770 | TypeF::Symbol
771 | TypeF::Contract(_)
772 // We don't fix type variables inside a dictionary contract. A dictionary contract
773 // should not be considered as a static type, but instead work as a contract. In
774 // particular we forbid capturing type variables from the enclosing type: see
775 // https://github.com/tweag/nickel/issues/1228.
776 | TypeF::Dict { flavour: DictTypeFlavour::Contract, ..}
777 | TypeF::Wildcard(_) => Ok(None),
778 TypeF::Arrow(src, tgt) => {
779 let src_result = src.fix_type_vars_env(alloc, bound_vars.clone(), span)?;
780 let tgt_result = tgt.fix_type_vars_env(alloc, bound_vars, span)?;
781
782 if src_result.is_some() || tgt_result.is_some() {
783 let src = src_result.map(|ty| alloc.alloc(ty)).unwrap_or(src);
784 let tgt = tgt_result.map(|ty| alloc.alloc(ty)).unwrap_or(tgt);
785
786 Ok(Some(build_fixed(TypeF::Arrow(src, tgt))))
787 }
788 else {
789 Ok(None)
790 }
791 }
792 TypeF::Var(sym) => {
793 if let Some(cell) = bound_vars.get(&sym) {
794 cell.try_set(VarKind::Type)
795 .map_err(|_| ParseError::TypeVariableKindMismatch {
796 ty_var: LocIdent::from(sym).with_pos(self.pos),
797 span
798 })?;
799
800 Ok(None)
801 } else {
802 let id = LocIdent::from(sym).with_pos(self.pos);
803
804 Ok(Some(build_fixed(TypeF::Contract(alloc.alloc(Ast {
805 node: Node::Var(id),
806 pos: id.pos,
807 })))))
808 }
809 }
810 TypeF::Forall {
811 var,
812 var_kind: ref prev_var_kind,
813 body,
814 } => {
815 // We span a new `VarKindCell` and put it in the environment. The recursive calls
816 // to `fix_type_vars` will fill this cell with the correct kind, which we get
817 // afterwards to set the right value for `var_kind`.
818 bound_vars.insert(var.ident(), VarKindCell::new());
819 let body_fixed = body.fix_type_vars_env(alloc, bound_vars.clone(), span)?;
820
821 // unwrap(): we just inserted a value for `var` above, and environment can never
822 // delete values.
823 // take_var_kind(): once we leave the body of this forall, we no longer need
824 // access to this VarKindCell in bound_vars. We can avoid a clone by taking
825 // the var_kind out. We could also take the whole key value pair out of the
826 // `Environment`, but ownership there is trickier.
827 let var_kind = bound_vars
828 .get(&var.ident())
829 .unwrap()
830 .take_var_kind()
831 .unwrap_or_default();
832
833 // By default, the parser sets `var_kind` to `Type`. If the `var_kind` turns out to
834 // actually be `Type`, and the body hasn' changed, we can avoid any cloning and
835 // return `Ok(None)`. Otherwise, we have to build a new `TypeF::Forall`. We still
836 // want to defend against callers that wouldn't follow this convention (that
837 // `prev_var_kind` is necessarily `Type` before fixing), so we still check it.
838 if body_fixed.is_some() || !matches!((&var_kind, &prev_var_kind), (&VarKind::Type, &VarKind::Type)) {
839 let body = body_fixed.map(|body| alloc.alloc(body)).unwrap_or(body);
840
841 Ok(Some(build_fixed(TypeF::Forall {
842 var,
843 var_kind,
844 body,
845 })))
846 } else {
847 Ok(None)
848 }
849 }
850 TypeF::Dict {
851 type_fields,
852 flavour: flavour @ DictTypeFlavour::Type
853 } => {
854 Ok(type_fields.fix_type_vars_env(alloc, bound_vars, span)?.map(|ty| {
855 build_fixed(TypeF::Dict {
856 type_fields: alloc.alloc(ty),
857 flavour,
858 })
859 }))
860 }
861 TypeF::Array(ty) => {
862 Ok(ty.fix_type_vars_env(alloc, bound_vars, span)?.map(|ty|
863 build_fixed(TypeF::Array(alloc.alloc(ty)))))
864 }
865 TypeF::Enum(ref erows) => {
866 Ok(erows.fix_type_vars_env(alloc, bound_vars, span)?.map(|erows|
867 build_fixed(TypeF::Enum(erows))
868 ))
869 }
870 TypeF::Record(ref rrows) => {
871 Ok(rrows.fix_type_vars_env(alloc, bound_vars, span)?.map(|rrows|
872 build_fixed(TypeF::Record(rrows))
873 ))
874 }
875 }
876 }
877}
878
879impl<'ast> FixTypeVars<'ast> for RecordRows<'ast> {
880 fn fix_type_vars_env(
881 &self,
882 alloc: &'ast AstAlloc,
883 bound_vars: BoundVarEnv,
884 span: RawSpan,
885 ) -> Result<Option<Self>, ParseError> {
886 fn do_fix<'ast>(
887 rrows: &RecordRows<'ast>,
888 alloc: &'ast AstAlloc,
889 bound_vars: BoundVarEnv,
890 span: RawSpan,
891 mut maybe_excluded: HashSet<Ident>,
892 ) -> Result<Option<RecordRows<'ast>>, ParseError> {
893 match rrows.0 {
894 RecordRowsF::Empty | RecordRowsF::TailDyn => Ok(None),
895 // We can't have a contract in tail position, so we don't fix `TailVar`. However, we
896 // have to set the correct kind for the corresponding forall binder.
897 RecordRowsF::TailVar(id) => {
898 if let Some(cell) = bound_vars.get(&id.ident()) {
899 cell.try_set(VarKind::RecordRows {
900 excluded: maybe_excluded,
901 })
902 .map_err(|_| ParseError::TypeVariableKindMismatch { ty_var: id, span })?;
903 }
904
905 Ok(None)
906 }
907 RecordRowsF::Extend { ref row, tail } => {
908 maybe_excluded.insert(row.id.ident());
909
910 let row_fixed = row.fix_type_vars_env(alloc, bound_vars.clone(), span)?;
911 let tail_fixed = do_fix(tail, alloc, bound_vars, span, maybe_excluded)?;
912
913 if row_fixed.is_some() || tail_fixed.is_some() {
914 let row = row_fixed.unwrap_or_else(|| row.clone());
915 let tail = tail_fixed
916 .map(|tail_fixed| alloc.alloc(tail_fixed))
917 .unwrap_or(tail);
918
919 Ok(Some(RecordRows(RecordRowsF::Extend { row, tail })))
920 } else {
921 Ok(None)
922 }
923 }
924 }
925 }
926
927 do_fix(self, alloc, bound_vars, span, HashSet::new())
928 }
929}
930
931impl<'ast> FixTypeVars<'ast> for RecordRow<'ast> {
932 fn fix_type_vars_env(
933 &self,
934 alloc: &'ast AstAlloc,
935 bound_vars: BoundVarEnv,
936 span: RawSpan,
937 ) -> Result<Option<Self>, ParseError> {
938 Ok(self
939 .typ
940 .fix_type_vars_env(alloc, bound_vars, span)?
941 .map(|typ| RecordRow {
942 id: self.id,
943 typ: alloc.alloc(typ),
944 }))
945 }
946}
947
948impl<'ast> FixTypeVars<'ast> for EnumRows<'ast> {
949 fn fix_type_vars_env(
950 &self,
951 alloc: &'ast AstAlloc,
952 bound_vars: BoundVarEnv,
953 span: RawSpan,
954 ) -> Result<Option<Self>, ParseError> {
955 fn do_fix<'ast>(
956 erows: &EnumRows<'ast>,
957 alloc: &'ast AstAlloc,
958 bound_vars: BoundVarEnv,
959 span: RawSpan,
960 mut maybe_excluded: HashSet<Ident>,
961 ) -> Result<Option<EnumRows<'ast>>, ParseError> {
962 match erows.0 {
963 EnumRowsF::Empty => Ok(None),
964 // We can't have a contract in tail position, so we don't fix `TailVar` itself.
965 // However, we have to set the correct kind for the corresponding forall binder.
966 EnumRowsF::TailVar(id) => {
967 if let Some(cell) = bound_vars.get(&id.ident()) {
968 cell.try_set(VarKind::EnumRows {
969 excluded: maybe_excluded,
970 })
971 .map_err(|_| ParseError::TypeVariableKindMismatch { ty_var: id, span })?;
972 }
973
974 Ok(None)
975 }
976 EnumRowsF::Extend { ref row, tail } => {
977 // Enum tags (when `typ` is `None`) can't create a conflict, so we ignore them
978 // for constraints. See the documentation of `typecheck::unif::RowConstrs`.
979 if row.typ.is_some() {
980 maybe_excluded.insert(row.id.ident());
981 }
982
983 let row_fixed = row.fix_type_vars_env(alloc, bound_vars.clone(), span)?;
984 let tail_fixed = do_fix(tail, alloc, bound_vars, span, maybe_excluded)?;
985
986 if row_fixed.is_some() || tail_fixed.is_some() {
987 let row = row_fixed.unwrap_or_else(|| row.clone());
988 let tail = tail_fixed
989 .map(|tail_fixed| alloc.alloc(tail_fixed))
990 .unwrap_or(tail);
991
992 Ok(Some(EnumRows(EnumRowsF::Extend { row, tail })))
993 } else {
994 Ok(None)
995 }
996 }
997 }
998 }
999
1000 do_fix(self, alloc, bound_vars, span, HashSet::new())
1001 }
1002}
1003
1004impl<'ast> FixTypeVars<'ast> for EnumRow<'ast> {
1005 fn fix_type_vars_env(
1006 &self,
1007 alloc: &'ast AstAlloc,
1008 bound_vars: BoundVarEnv,
1009 span: RawSpan,
1010 ) -> Result<Option<Self>, ParseError> {
1011 // `maybe_fixed` is `Some(ty)` if and only if this enum rows has an associated
1012 // type *and* the type has been changed by fixing.
1013 let maybe_fixed = self
1014 .typ
1015 .as_ref()
1016 .map(|ty| {
1017 // Enum tags (when `typ` is `None`) can't create a conflict, so we ignore them
1018 // for constraints. See the documentation of `typecheck::unif::RowConstrs`.
1019 ty.fix_type_vars_env(alloc, bound_vars.clone(), span)
1020 })
1021 .transpose()?
1022 .flatten();
1023
1024 Ok(maybe_fixed.map(|typ| EnumRow {
1025 id: self.id,
1026 typ: Some(alloc.alloc(typ)),
1027 }))
1028 }
1029}
1030
1031/// Fix the type variables of types appearing as annotations of record fields. See the in-code
1032/// documentation of the private symbol `Types::fix_type_vars`.
1033pub fn fix_field_types<'ast>(
1034 alloc: &'ast AstAlloc,
1035 metadata: FieldMetadata<'ast>,
1036 span: RawSpan,
1037) -> Result<FieldMetadata<'ast>, ParseError> {
1038 use std::borrow::Cow;
1039
1040 let typ = metadata
1041 .annotation
1042 .typ
1043 .map(|typ| typ.fix_type_vars(alloc, span))
1044 .transpose()?;
1045
1046 let contracts: Result<Vec<Cow<'ast, _>>, ParseError> = metadata
1047 .annotation
1048 .contracts
1049 .iter()
1050 .map(|ctr| {
1051 Ok(ctr
1052 .fix_type_vars_ref(alloc, span)?
1053 .map(Cow::Owned)
1054 .unwrap_or(Cow::Borrowed(ctr)))
1055 })
1056 .collect();
1057 let contracts = contracts?;
1058
1059 // If none of the contracts have been changed, we can keep the original `[Type]` allocation.
1060 let contracts = if contracts.iter().all(|cow| matches!(cow, Cow::Borrowed(_))) {
1061 metadata.annotation.contracts
1062 } else {
1063 alloc.alloc_many(contracts.into_iter().map(|cow| cow.into_owned()))
1064 };
1065
1066 Ok(FieldMetadata {
1067 annotation: Annotation { typ, contracts },
1068 ..metadata
1069 })
1070}