hindley_milner/
lib.rs

1#[macro_use] extern crate maplit;
2#[macro_use] extern crate lazy_static;
3
4use std::collections::{HashMap, HashSet};
5use std::fmt;
6
7#[derive(Debug, PartialEq, Eq, Clone)]
8pub enum Syntax {
9    Lambda {
10        v: String,
11        body: Box<Syntax>,
12    },
13    Identifier {
14        name: String,
15    },
16    Apply {
17        func: Box<Syntax>,
18        arg: Box<Syntax>,
19    },
20    Let {
21        v: String,
22        defn: Box<Syntax>,
23        body: Box<Syntax>,
24    },
25    Letrec {
26        v: String,
27        defn: Box<Syntax>,
28        body: Box<Syntax>,
29    },
30}
31
32impl fmt::Display for Syntax {
33    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
34        use Syntax::*;
35        match self {
36            &Lambda { ref v, ref body } => {
37                write!(f, "(fn {v} => {body})", v=v, body=body)
38            }
39            &Identifier { ref name } => {
40                write!(f, "{}", name)
41            }
42            &Apply { ref func, ref arg } => {
43                write!(f, "({func} {arg})", func=func, arg=arg)
44            }
45            &Let { ref v, ref defn, ref body } => {
46                write!(f, "(let {v} = {defn} in {body})", v=v, defn=defn, body=body)
47            }
48            &Letrec { ref v, ref defn, ref body } => {
49                write!(f, "(letrec {v} = {defn} in {body})", v=v, defn=defn, body=body)
50            }
51        }
52    }
53}
54
55pub enum Errors {
56    InferenceError(String),
57    ParseError(String),
58}
59
60// Types and type constructors
61
62pub type ArenaType = usize;
63
64#[derive(Debug, Clone, PartialEq, Eq, Hash)]
65pub enum Type {
66    Variable {
67        id: ArenaType,
68        instance: Option<ArenaType>,
69    },
70    Operator {
71        id: ArenaType,
72        name: String,
73        types: Vec<ArenaType>,
74    }
75}
76
77struct Namer {
78    value: char,
79    set: HashMap<ArenaType, String>,
80}
81
82impl Namer {
83    fn next(&mut self) -> String {
84        let v = self.value;
85        self.value = ((self.value as u8) + 1) as char;
86        format!("{}", v)
87    }
88
89    fn name(&mut self, t: ArenaType) -> String {
90        let k = {
91            self.set.get(&t).map(|x| x.clone())
92        };
93        if let Some(val) = k {
94            val.clone()
95        } else {
96            let v = self.next();
97            self.set.insert(t, v.clone());
98            v
99        }
100    }
101}
102
103/// A type variable standing for an arbitrary type.
104///
105/// All type variables have a unique id, but names are
106/// only assigned lazily, when required.
107
108impl Type {
109    fn new_variable(idx: ArenaType) -> Type {
110        Type::Variable {
111            id: idx,
112            instance: None,
113        }
114    }
115
116    fn new_operator(idx: ArenaType, name: &str, types: &[ArenaType]) -> Type {
117        Type::Operator {
118            id: idx,
119            name: name.to_string(),
120            types: types.to_vec(),
121        }
122    }
123
124    fn id(&self) -> usize {
125        match self {
126            &Type::Variable { id, .. } => {
127                id
128            }
129            &Type::Operator { id, .. } => {
130                id
131            }
132        }
133    }
134
135    fn set_instance(&mut self, instance: ArenaType) {
136        match self {
137            &mut Type::Variable { instance: ref mut inst, .. } => {
138                *inst = Some(instance);
139            }
140            _ => {
141                unimplemented!()
142            }
143        }
144    }
145
146    fn as_string(&self, a: &Vec<Type>, namer: &mut Namer) -> String {
147        match self {
148            &Type::Variable { instance: Some(inst), .. } => {
149                a[inst].as_string(a, namer)
150            },
151            &Type::Variable { .. } => {
152                namer.name(self.id())
153            },
154            &Type::Operator { ref types, ref name, .. } => {
155                match types.len() {
156                    0 => name.clone(),
157                    2 => {
158                        let l = a[types[0]].as_string(a, namer);
159                        let r = a[types[1]].as_string(a, namer);
160                        format!("({} {} {})", l, name, r)
161                    },
162                    _ => {
163                        let mut coll = vec![];
164                        for v in types {
165                            coll.push(a[*v].as_string(a, namer));
166                        }
167                        format!("{} {}", name, coll.join(" "))
168                    },
169                }
170            }
171        }
172    }
173}
174
175//impl fmt::Debug for Type {
176//    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
177//        match self {
178//            write!(f, "TypeVariable(id = {})", self.id)
179//            write!(f, "TypeOperator(name, )", self.id)
180//        }
181//    }
182//}
183
184/// A binary type constructor which builds function types
185pub fn new_function(a: &mut Vec<Type>, from_type: ArenaType, to_type: ArenaType) -> ArenaType {
186    let t = Type::new_operator(a.len(), "->", &[from_type, to_type]);
187    a.push(t);
188    a.len() - 1
189}
190
191/// A binary type constructor which builds function types
192pub fn new_variable(a: &mut Vec<Type>) -> ArenaType {
193    let t = Type::new_variable(a.len());
194    a.push(t);
195    a.len() - 1
196}
197
198/// A binary type constructor which builds function types
199pub fn new_operator(a: &mut Vec<Type>, name: &str, types: &[ArenaType]) -> ArenaType {
200    let t = Type::new_operator(a.len(), name, types);
201    a.push(t);
202    a.len() - 1
203}
204
205// Basic types are constructed with a nullary type constructor
206lazy_static! {
207    // Basic integer
208    static ref INTEGER: Type = Type::new_operator(0, "int", &[]);
209    // Basic bool
210    static ref BOOL: Type = Type::new_operator(1, "bool", &[]);
211}
212
213// Type inference machinery
214
215#[derive(Clone, Debug)]
216pub struct Env(HashMap<String, ArenaType>);
217
218/// Computes the type of the expression given by node.
219///
220/// The type of the node is computed in the context of the
221/// supplied type environment env. Data types can be introduced into the
222/// language simply by having a predefined set of identifiers in the initial
223/// environment. environment; this way there is no need to change the syntax or, more
224/// importantly, the type-checking program when extending the language.
225///
226/// Args:
227///     node: The root of the abstract syntax tree.
228///     env: The type environment is a mapping of expression identifier names
229///         to type assignments.
230///     non_generic: A set of non-generic variables, or None
231///
232/// Returns:
233///     The computed type of the expression.
234///
235/// Raises:
236///     InferenceError: The type of the expression could not be inferred, for example
237///         if it is not possible to unify two types such as Integer and Bool
238///     ParseError: The abstract syntax tree rooted at node could not be parsed
239pub fn analyse(a: &mut Vec<Type>, node: &Syntax, env: &Env, non_generic: &HashSet<ArenaType>) -> ArenaType {
240    use Syntax::*;
241    match node {
242        &Identifier { ref name } => {
243            get_type(a, name, env, non_generic)
244        }
245        &Apply { ref func, ref arg } => {
246            let fun_type = analyse(a, func, env, non_generic);
247            let arg_type = analyse(a, arg, env, non_generic);
248            let result_type = new_variable(a);
249            let first = new_function(a, arg_type, result_type.clone());
250            unify(a, first, fun_type);
251            result_type
252        }
253        &Lambda { ref v, ref body } => {
254            let arg_type = new_variable(a);
255            let mut new_env = env.clone();
256            new_env.0.insert(v.clone(), arg_type);
257            let mut new_non_generic = non_generic.clone();
258            new_non_generic.insert(arg_type.clone());
259            let result_type = analyse(a, body, &new_env, &new_non_generic);
260            new_function(a, arg_type, result_type)
261        }
262        &Let { ref defn, ref v, ref body } => {
263            let defn_type = analyse(a, defn, env, non_generic);
264            let mut new_env = env.clone();
265            new_env.0.insert(v.clone(), defn_type);
266            analyse(a, body, &new_env, non_generic)
267        }
268        &Letrec { ref defn, ref v, ref body } => {
269            let new_type = new_variable(a);
270            let mut new_env = env.clone();
271            new_env.0.insert(v.clone(), new_type.clone());
272            let mut new_non_generic = non_generic.clone();
273            new_non_generic.insert(new_type.clone());
274            let defn_type = analyse(a, defn, &new_env, &new_non_generic);
275            unify(a, new_type, defn_type);
276            analyse(a, body, &new_env, non_generic)
277        }
278    }
279}
280
281
282/// Get the type of identifier name from the type environment env.
283///
284///     Args:
285///         name: The identifier name
286///         env: The type environment mapping from identifier names to types
287///         non_generic: A set of non-generic TypeVariables
288///
289///     Raises:
290///         ParseError: Raised if name is an undefined symbol in the type
291///             environment.
292fn get_type(a: &mut Vec<Type>, name: &str, env: &Env, non_generic: &HashSet<ArenaType>) -> ArenaType {
293    if let Some(value) = env.0.get(name) {
294        let mat = non_generic.iter().cloned().collect::<Vec<_>>();
295        fresh(a, *value, &mat)
296    } else if is_integer_literal(name) {
297        0 //INTEGER.id
298    } else {
299        //raise ParseError("Undefined symbol {0}".format(name))
300        panic!("Undefined symbol {:?}", name);
301    }
302}
303
304/// Makes a copy of a type expression.
305///
306///     The type t is copied. The the generic variables are duplicated and the
307///     non_generic variables are shared.
308///
309///     Args:
310///         t: A type to be copied.
311///         non_generic: A set of non-generic TypeVariables
312fn fresh(a: &mut Vec<Type>, t: ArenaType, non_generic: &[ArenaType]) -> ArenaType {
313    // A mapping of TypeVariables to TypeVariables
314    let mut mappings = hashmap![];
315
316    fn freshrec(a: &mut Vec<Type>, tp: ArenaType, mappings: &mut HashMap<ArenaType, ArenaType>, non_generic: &[ArenaType]) -> ArenaType {
317        let p = prune(a, tp);
318        match a.get(p).unwrap().clone() {
319            Type::Variable { .. } => {
320                if is_generic(a, p, non_generic) {
321                    mappings.entry(p)
322                        .or_insert(new_variable(a))
323                        .clone()
324                } else {
325                    p
326                }
327            }
328            Type::Operator { ref name, ref types, .. } => {
329                let b = types.iter().map(|x| freshrec(a, *x, mappings, non_generic)).collect::<Vec<_>>();
330                new_operator(a, name, &b)
331            }
332        }
333    }
334
335    freshrec(a, t, &mut mappings, non_generic)
336}
337
338
339/// Unify the two types t1 and t2.
340///
341///     Makes the types t1 and t2 the same.
342///
343///     Args:
344///         t1: The first type to be made equivalent
345///         t2: The second type to be be equivalent
346///
347///     Returns:
348///         None
349///
350///     Raises:
351///         InferenceError: Raised if the types cannot be unified.
352fn unify(alloc: &mut Vec<Type>, t1: ArenaType, t2: ArenaType) {
353    let a = prune(alloc, t1);
354    let b = prune(alloc, t2);
355    match (alloc.get(a).unwrap().clone(), alloc.get(b).unwrap().clone()) {
356        (Type::Variable { .. }, _) => {
357            if a != b {
358                if occurs_in_type(alloc, a, b) {
359                    // raise InferenceError("recursive unification")
360                    panic!("recursive unification");
361                }
362                alloc.get_mut(a).unwrap().set_instance(b);
363            }
364        }
365        (Type::Operator { .. }, Type::Variable { .. }) => {
366            unify(alloc, b, a)
367        }
368        (Type::Operator { name: ref a_name, types: ref a_types, .. },
369            Type::Operator { name: ref b_name, types: ref b_types, .. }) => {
370            if a_name != b_name || a_types.len() != b_types.len() {
371                //raise InferenceError("Type mismatch: {0} != {1}".format(str(a), str(b)))
372                panic!("type mismatch");
373            }
374            for (p, q) in a_types.iter().zip(b_types.iter()) {
375                unify(alloc, *p, *q);
376            }
377        }
378    }
379}
380
381
382/// Returns the currently defining instance of t.
383///
384///     As a side effect, collapses the list of type instances. The function Prune
385///     is used whenever a type expression has to be inspected: it will always
386///     return a type expression which is either an uninstantiated type variable or
387///     a type operator; i.e. it will skip instantiated variables, and will
388///     actually prune them from expressions to remove long chains of instantiated
389///     variables.
390///
391///     Args:
392///         t: The type to be pruned
393///
394///     Returns:
395///         An uninstantiated TypeVariable or a TypeOperator
396fn prune(a: &mut Vec<Type>, t: ArenaType) -> ArenaType {
397    let v2 = match a.get(t).unwrap() {
398        //TODO screwed up
399        &Type::Variable { instance, .. } => {
400            if let Some(value) = instance {
401                value
402            } else {
403                return t;
404            }
405        }
406        _ => {
407            return t;
408        }
409    };
410
411    let value = prune(a, v2);
412    match a.get_mut(t).unwrap() {
413        //TODO screwed up
414        &mut Type::Variable { ref mut instance, .. } => {
415            *instance = Some(value);
416        }
417        _ => {
418            return t;
419        }
420    }
421    value
422}
423
424
425/// Checks whether a given variable occurs in a list of non-generic variables
426///
427///     Note that a variables in such a list may be instantiated to a type term,
428///     in which case the variables contained in the type term are considered
429///     non-generic.
430///
431///     Note: Must be called with v pre-pruned
432///
433///     Args:
434///         v: The TypeVariable to be tested for genericity
435///         non_generic: A set of non-generic TypeVariables
436///
437///     Returns:
438///         True if v is a generic variable, otherwise False
439fn is_generic(a: &mut Vec<Type>, v: ArenaType, non_generic: &[ArenaType]) -> bool {
440    !occurs_in(a, v, non_generic)
441}
442
443
444/// Checks whether a type variable occurs in a type expression.
445///
446///     Note: Must be called with v pre-pruned
447///
448///     Args:
449///         v:  The TypeVariable to be tested for
450///         type2: The type in which to search
451///
452///     Returns:
453///         True if v occurs in type2, otherwise False
454fn occurs_in_type(a: &mut Vec<Type>, v: ArenaType, type2: ArenaType) -> bool {
455    let pruned_type2 = prune(a, type2);
456    if pruned_type2 == v {
457        return true;
458    }
459    match a.get(pruned_type2).unwrap().clone() {
460        Type::Operator { ref types, .. } => {
461            occurs_in(a, v, types)
462        }
463        _ => false
464    }
465}
466
467
468/// Checks whether a types variable occurs in any other types.
469///
470/// Args:
471///     t:  The TypeVariable to be tested for
472///     types: The sequence of types in which to search
473///
474/// Returns:
475///     True if t occurs in any of types, otherwise False
476///
477fn occurs_in(a: &mut Vec<Type>, t: ArenaType, types: &[ArenaType]) -> bool {
478    for t2 in types.iter() {
479        if occurs_in_type(a, t, *t2) {
480            return true;
481        }
482    }
483    return false;
484}
485
486/// Checks whether name is an integer literal string.
487///
488/// Args:
489///     name: The identifier to check
490///
491/// Returns:
492///     True if name is an integer literal, otherwise False
493fn is_integer_literal(name: &str) -> bool {
494    name.parse::<isize>().is_ok()
495}
496
497
498//=====================================================
499
500
501pub fn new_lambda(v: &str, body: Syntax) -> Syntax {
502    Syntax::Lambda {
503        v: v.to_string(),
504        body: Box::new(body),
505    }
506}
507
508pub fn new_apply(func: Syntax, arg: Syntax) -> Syntax {
509    Syntax::Apply {
510        func: Box::new(func),
511        arg: Box::new(arg),
512    }
513}
514
515pub fn new_let(v: &str, defn: Syntax, body: Syntax) -> Syntax {
516    Syntax::Let {
517        v: v.to_string(),
518        defn: Box::new(defn),
519        body: Box::new(body),
520    }
521}
522
523pub fn new_letrec(v: &str, defn: Syntax, body: Syntax) -> Syntax {
524    Syntax::Letrec {
525        v: v.to_string(),
526        defn: Box::new(defn),
527        body: Box::new(body),
528    }
529}
530
531pub fn new_identifier(name: &str) -> Syntax {
532    Syntax::Identifier {
533        name: name.to_string(),
534    }
535}
536
537fn test_env() -> (Vec<Type>, Env) {
538    let mut a = vec![
539        INTEGER.clone(),
540        BOOL.clone(),
541    ];
542    let var1 = new_variable(&mut a);
543    let var2 = new_variable(&mut a);
544    let pair_type = new_operator(&mut a, "*", &[var1, var2]);
545
546    let var3 = new_variable(&mut a);
547
548    let my_env = Env(hashmap![
549        "pair".to_string() => {
550            let right = new_function(&mut a, var2, pair_type);
551            new_function(&mut a, var1, right)
552        },
553        "true".to_string() => 1,
554        "cond".to_string() => {
555            let right = new_function(&mut a, var3, var3);
556            let right = new_function(&mut a, var3, right);
557            new_function(&mut a, 1, right)
558        },
559        "zero".to_string() => new_function(&mut a, 0, 1),
560        "pred".to_string() => new_function(&mut a, 0, 0),
561        "times".to_string() => {
562            let right = new_function(&mut a, 0, 0);
563            new_function(&mut a, 0, right)
564        },
565    ]);
566
567    (a, my_env)
568}
569
570/// Sets up some predefined types using the type constructors TypeVariable,
571/// TypeOperator and Function.  Creates a list of example expressions to be
572/// evaluated. Evaluates the expressions, printing the type or errors arising
573/// from each.
574
575#[test]
576fn test_factorial() {
577    let (mut a, my_env) = test_env();
578
579    // factorial
580    let syntax = new_letrec("factorial",  // letrec factorial =
581           new_lambda("n",  // fn n =>
582                  new_apply(
583                      new_apply(  // cond (zero n) 1
584                          new_apply(new_identifier("cond"),  // cond (zero n)
585                                new_apply(new_identifier("zero"), new_identifier("n"))),
586                          new_identifier("1")),
587                      new_apply(  // times n
588                          new_apply(new_identifier("times"), new_identifier("n")),
589                          new_apply(new_identifier("factorial"),
590                                new_apply(new_identifier("pred"), new_identifier("n")))
591                      )
592                  )
593                  ),  // in
594           new_apply(new_identifier("factorial"), new_identifier("5"))
595       );
596
597    let t = analyse(&mut a, &syntax, &my_env, &hashset![]);
598    assert_eq!(a[t].as_string(&a, &mut Namer {
599        value: 'a',
600        set: hashmap![],
601    }), r#"int"#);
602}
603
604#[should_panic]
605#[test]
606fn test_mismatch() {
607    let (mut a, my_env) = test_env();
608
609    // fn x => (pair(x(3) (x(true)))
610    let syntax = new_lambda("x",
611       new_apply(
612           new_apply(new_identifier("pair"),
613                 new_apply(new_identifier("x"), new_identifier("3"))),
614           new_apply(new_identifier("x"), new_identifier("true"))));
615
616    let _ = analyse(&mut a, &syntax, &my_env, &hashset![]);
617}
618
619#[should_panic]
620#[test]
621fn test_pair() {
622    let (mut a, my_env) = test_env();
623
624    // pair(f(3), f(true))
625    let syntax = new_apply(
626        new_apply(new_identifier("pair"), new_apply(new_identifier("f"), new_identifier("4"))),
627        new_apply(new_identifier("f"), new_identifier("true")));
628
629    let _ = analyse(&mut a, &syntax, &my_env, &hashset![]);
630}
631
632#[test]
633fn test_mul() {
634    let (mut a, my_env) = test_env();
635
636    let pair = new_apply(new_apply(new_identifier("pair"),
637                       new_apply(new_identifier("f"),
638                             new_identifier("4"))),
639                 new_apply(new_identifier("f"),
640                       new_identifier("true")));
641
642    // let f = (fn x => x) in ((pair (f 4)) (f true))
643    let syntax = new_let("f", new_lambda("x", new_identifier("x")), pair);
644
645    let t = analyse(&mut a, &syntax, &my_env, &hashset![]);
646    assert_eq!(a[t].as_string(&a, &mut Namer {
647        value: 'a',
648        set: hashmap![],
649    }), r#"(int * bool)"#);
650}
651
652#[should_panic]
653#[test]
654fn test_recursive() {
655    let (mut a, my_env) = test_env();
656
657    // fn f => f f (fail)
658    let syntax = new_lambda("f", new_apply(new_identifier("f"), new_identifier("f")));
659
660    let t = analyse(&mut a, &syntax, &my_env, &hashset![]);
661    assert_eq!(a[t].as_string(&a, &mut Namer {
662        value: 'a',
663        set: hashmap![],
664    }), r#"int"#);
665}
666
667#[test]
668fn test_int() {
669    let (mut a, my_env) = test_env();
670
671    // let g = fn f => 5 in g g
672    let syntax = new_let("g",
673        new_lambda("f", new_identifier("5")),
674        new_apply(new_identifier("g"), new_identifier("g")));
675
676    let t = analyse(&mut a, &syntax, &my_env, &hashset![]);
677    assert_eq!(a[t].as_string(&a, &mut Namer {
678        value: 'a',
679        set: hashmap![],
680    }), r#"int"#);
681}
682
683
684#[test]
685fn test_generic_nongeneric() {
686    let (mut a, my_env) = test_env();
687
688    // example that demonstrates generic and non-generic variables:
689    // fn g => let f = fn x => g in pair (f 3, f true)
690    let syntax = new_lambda("g",
691           new_let("f",
692               new_lambda("x", new_identifier("g")),
693               new_apply(
694                   new_apply(new_identifier("pair"),
695                         new_apply(new_identifier("f"), new_identifier("3"))
696                         ),
697                   new_apply(new_identifier("f"), new_identifier("true")))));
698
699    let t = analyse(&mut a, &syntax, &my_env, &hashset![]);
700    assert_eq!(a[t].as_string(&a, &mut Namer {
701        value: 'a',
702        set: hashmap![],
703    }), r#"(a -> (a * a))"#);
704}
705
706
707#[test]
708fn test_composition() {
709    let (mut a, my_env) = test_env();
710
711    // Function composition
712    // fn f (fn g (fn arg (f g arg)))
713    let syntax = new_lambda("f", new_lambda("g", new_lambda("arg", new_apply(new_identifier("g"), new_apply(new_identifier("f"), new_identifier("arg"))))));
714
715    let t = analyse(&mut a, &syntax, &my_env, &hashset![]);
716    assert_eq!(a[t].as_string(&a, &mut Namer {
717        value: 'a',
718        set: hashmap![],
719    }), r#"((a -> b) -> ((b -> c) -> (a -> c)))"#);
720}
721
722
723#[test]
724fn test_fun() {
725    let (mut a, my_env) = test_env();
726
727    // Function composition
728    // fn f (fn g (fn arg (f g arg)))
729    let syntax = new_lambda("f", new_lambda("g", new_lambda("arg", new_apply(new_identifier("g"), new_apply(new_identifier("f"), new_identifier("arg"))))));
730
731    let t = analyse(&mut a, &syntax, &my_env, &hashset![]);
732    assert_eq!(a[t].as_string(&a, &mut Namer {
733        value: 'a',
734        set: hashmap![],
735    }), r#"((a -> b) -> ((b -> c) -> (a -> c)))"#);
736}