hashcons/
merkle.rs

1use std::rc::Rc;
2use std::fmt::{self,Debug};
3use std::ops::Deref;
4use serde::de::*;
5use serde::ser::*;
6use std::hash::{Hash,Hasher};
7use std::collections::hash_map::DefaultHasher;
8use std::collections::HashMap;
9use std::cell::RefCell;
10use core::any::Any;
11
12pub type Id = u64;
13
14/// A shared instance of T that serializes once, not once per reference.
15///
16/// Unlike a "bare" Rc<T>, a Merkle<T> enjoys the practical property
17/// that, when a structure holding multiple (shared) instances of T is
18/// serialized, this serialized output holds only _one_ occurrence of
19/// a T's serialized representation; the other occurrences merely
20/// consist of the T's unique identifier (the serialization of an
21/// `Id`, single machine word on modern machines).
22///
23/// In particular, a shared T has a unique ID permitting table-based
24/// indirection, via temporary storage used by serialization and
25/// serialization logic; by contrast, a bare Rc<T> lacks this
26/// indirection, and thus, it lacks a compact serialized
27/// representation for structures with abundant sharing. Generally,
28/// abundant sharing via many shared Rc<_>s leads to exponential "blow
29/// up" in terms of serialized space and time.
30///
31#[derive(Clone)]
32pub struct Merkle<T> {
33    ptr:MerklePtr<T>
34}
35
36#[derive(Serialize, Deserialize, Clone)]
37enum MerklePtr<T> {
38    /// Outside of serialized representations of a Merkle<T>, the
39    /// constructor Rc is the only valid inhabitant of this type.
40    Rc(Id, Rc<T>),
41
42    /// The Copy constructor only appears in the _serialized
43    /// representations_ of a Merkle<T> instance; it _never_ occurs in
44    /// the deserialized, in-memory versions of this type.  We rely on
45    /// this invariant to avoid keeping around a lookup table after
46    /// deserialization, and to avoid doing table lookups for any
47    /// deref of a MerklePtr<_>.
48    Copy(Id),
49}
50
51impl<T:Hash+'static> Merkle<T> {
52    pub fn id(&self) -> Id {
53        match self.ptr {
54            MerklePtr::Rc(id, _) => id.clone(),
55            MerklePtr::Copy(id)  => id.clone(),
56        }
57    }
58    pub fn new(t:T) -> Merkle<T> {
59        let mut hasher = DefaultHasher::new();
60        t.hash(&mut hasher);
61        let id = hasher.finish();
62        Merkle{ptr:MerklePtr::Rc(id, Rc::new(t))}
63    }
64    pub fn from_rc(rc:Rc<T>) -> Merkle<T> {
65        let mut hasher = DefaultHasher::new();
66        rc.hash(&mut hasher);
67        let id = hasher.finish();
68        Merkle{ptr:MerklePtr::Rc(id, rc)}
69    }
70}
71
72impl<T:PartialEq+'static> PartialEq for Merkle<T> {
73    fn eq(&self, other:&Self) -> bool {
74        match (&self.ptr, &other.ptr) {
75            (&MerklePtr::Rc(ref id1, ref rc1),
76             &MerklePtr::Rc(ref id2, ref rc2)) => {
77                if true {
78                    // Shallow O(1) comparison, via unique IDs.  This
79                    // is "sound" to the extent that hashing avoids
80                    // collisions.  If you feel paranoid, follow the
81                    // other implementation, which compares the
82                    // content of the two Rc<_>s.
83                    id1 == id2
84                } else {
85                    rc1 == rc2
86                }
87            },
88            _ => unreachable!()
89        }
90    }
91}
92impl<T:PartialEq+'static> Eq for Merkle<T> { }
93
94impl<T:'static+Hash> Hash for Merkle<T> {
95    fn hash<H>(&self, state: &mut H) where H: Hasher {
96        // The Merkle-tree-like structure of a MerklePtr<T> avoids
97        // "deep" hashing operations; instead, hashing merely
98        // re-hashes the pre-computed (unique) Id of this deep
99        // structure.
100        self.id().hash(state)
101    }
102}
103
104impl<T:Debug> fmt::Debug for Merkle<T> {
105    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
106        match self.ptr {
107            MerklePtr::Rc(_, ref rc) => rc.fmt(f),
108            MerklePtr::Copy(_) => unreachable!()
109        }        
110    }
111}
112
113/// We may deference a (deserialized) Merkle<T> just like an Rc<T>;
114/// below, rely on the invariant that all deserialized Merkle<T>'s
115/// consist of an Rc<T> (along with a unique ID).
116impl<T> Deref for Merkle<T> {
117    type Target = T;
118    fn deref(&self) -> &T {
119        match self.ptr {
120            MerklePtr::Rc(_, ref rc) => &*rc,
121            MerklePtr::Copy(_) => unreachable!(),
122        }
123    }
124}
125
126impl<T:Serialize+Hash+'static> Serialize for Merkle<T> {
127    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
128    where
129        S: Serializer,
130    {
131        // Check to see if self.id has already been serialized; if so,
132        // do not serialize yet another copy; instead, serialize a
133        // Copy<T> with the same ID.
134        let orc : Option<Rc<T>> = table_get(&self.id());
135        match (&self.ptr, orc) {
136            (&MerklePtr::Copy(_), _) => unreachable!(),
137            (&MerklePtr::Rc(ref id, ref rc), None) => {
138                table_put(id.clone(), rc.clone());
139                self.ptr.serialize(serializer)
140            }
141            (&MerklePtr::Rc(ref id, ref _rc1), Some(ref _rc2)) => {
142                table_inc_copy_count();
143                let ptr_copy:MerklePtr<T> = MerklePtr::Copy(id.clone());
144                ptr_copy.serialize(serializer)
145            }
146        }
147    }
148}
149impl<'de,T:Deserialize<'de>+Hash+'static> Deserialize<'de> for Merkle<T> {
150    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
151    where
152        D: Deserializer<'de>,
153    {
154        match MerklePtr::<T>::deserialize(deserializer) {
155            Ok(MerklePtr::Copy(id)) => {
156                match table_get(&id) {
157                    None => unreachable!(),
158                    Some(rc) => {
159                        table_inc_copy_count();
160                        Ok(Merkle{ptr:MerklePtr::Rc(id, rc)})
161                    }
162                }
163            }
164            Ok(MerklePtr::Rc(id, rc)) => {
165                table_put(id.clone(), rc.clone());
166                Ok(Merkle{ptr:MerklePtr::Rc(id, rc)})
167            },
168            Err(err) => Err(err),
169        }
170    }
171}
172
173/////////////////////////////////////////////////////////////////////////////////////
174
175struct Table {
176    copy_count:usize,
177    table:HashMap<Id,Box<Rc<dyn Any>>>
178}
179
180// Global table of serialized objects; permits us to avoid multiple
181// serialized copies of a single, shared object.
182thread_local!(static TABLE:
183              RefCell<Table> =
184              RefCell::new(Table{
185                  copy_count:0,
186                  table:HashMap::new()
187              }));
188
189/// Put a reference-counted object into the table of serialized objects
190fn table_put<T:Any+'static>(id:Id, x:Rc<T>) {
191    TABLE.with(|t| {
192        drop(t.borrow_mut().table.insert(id, Box::new(x)))
193    })
194}
195
196/// Increment the copy count associated with the table; used by
197/// regression tests and other diagnostics.
198fn table_inc_copy_count() {
199    TABLE.with(|t| {
200        t.borrow_mut().copy_count += 1;
201    })    
202}
203
204/// Get a reference-counted object from the table of serialized objects
205///
206/// For documentation for rc_downcast feature, see this:
207/// https://github.com/rust-lang/rust/blob/71d3dac4a86d192c2c80948621859da3b363fa50/src/liballoc/rc.rs#L621
208///
209fn table_get<T:'static>(id:&Id) -> Option<Rc<T>> {
210    TABLE.with(|t| {
211        match t.borrow().table.get(id) {
212            Some(ref brc) => {
213                let x : &Rc<dyn Any> = &**brc;
214                let y : Result<Rc<T>, Rc<dyn Any>> = (x.clone()).downcast::<T>();
215                match y {
216                    Err(_) => {
217                        panic!("downcast failed for id {:?}", id)
218                    }
219                    Ok(ref rc) => Some((*rc).clone())
220                }
221            }
222            None => None,
223        }
224    })
225}
226
227/// Reclaim the space used to serialize large structures.  Returns the
228/// "copy count" of the table.
229///
230/// We use this "copy count" for regression tests, to ensure that we
231/// get the compactness that we expect in these tests.
232///
233/// This `clear` operation is essential for memory-sensitive programs
234/// that dump their structures to external storage: when these
235/// serialized structures are no longer needed by the Rust program,
236/// their reference count will not drop to zero without first using
237/// this operation.
238///
239pub fn clear() -> usize {
240    let copy_count = 
241        TABLE.with(|t| {
242            let c = t.borrow().copy_count;
243            t.borrow_mut().table.clear();
244            t.borrow_mut().copy_count = 0;
245            c
246        });
247    copy_count
248}
249
250
251//////////////////////////////////////////////////////////////////////////////
252
253mod list_example {
254    use super::Merkle;
255   
256    #[derive(Hash,Clone,Debug,PartialEq,Eq,Serialize,Deserialize)]
257    enum List {
258        Nil,
259        Cons(usize, Merkle<List>)
260    }
261    
262    fn nil() -> List {
263        List::Nil
264    }
265
266    fn cons(h:usize, t:List) -> List {
267        List::Cons(h, Merkle::new(t))
268    }
269
270    #[allow(unused)]
271    fn sum(l:&List) -> usize {
272        match *l {
273            List::Nil => 0,
274            List::Cons(ref h, ref t) => {
275                h + sum(&*t)
276            }
277        }
278    }
279
280    #[allow(unused)]
281    fn from_vec(v:&Vec<usize>) -> List {
282        let mut l = nil();
283        for x in v.iter() {
284            l = cons(*x, l);
285        }
286        return l
287    }
288
289    #[test]
290    fn test_elim_forms() {
291        let x = from_vec(&vec![1,2,3]);
292        assert_eq!(1+2+3, sum(&x))
293    }
294    
295    #[test]
296    fn test_intro_forms() {
297        let x = nil();
298        let x = cons(1, x);
299        let y = cons(2, x.clone());
300        let z = cons(3, x.clone());
301        drop((x,y,z))
302    }
303
304    #[test]
305    fn test_serde() {
306        use serde_json;
307        let (value, expected_copy_count) = {
308            let x = nil();
309            let x = cons(1, x);
310            let y = cons(2, x.clone());
311            let z = cons(3, x.clone());
312            ((x,y,z), 2)
313        };
314               
315        let serialized = serde_json::to_string(&value).unwrap();
316        let copy_count1 = super::clear();
317            
318        println!("serialized = {}", serialized);
319        println!("copy_count1 = {}", copy_count1);        
320        assert_eq!(copy_count1, expected_copy_count);
321        
322        let deserialized: (List,List,List) =
323            serde_json::from_str(&serialized[..]).unwrap();
324        let copy_count2 = super::clear();
325        
326        println!("copy_count2 = {}", copy_count2);
327        assert_eq!(copy_count2, expected_copy_count);
328        
329        assert_eq!(deserialized, value);
330    }
331}