hdi/hash_path/
path.rs

1use crate::hash_path::shard::ShardStrategy;
2use crate::hash_path::shard::SHARDEND;
3use crate::prelude::*;
4use holochain_integrity_types::link::LinkTag;
5use holochain_wasmer_guest::*;
6use std::str::FromStr;
7
8/// Root for all paths.
9pub const ROOT: &[u8; 2] = &[0x00, 0x01];
10
11pub fn root_hash() -> ExternResult<AnyLinkableHash> {
12    hash_entry(Entry::App(
13        AppEntryBytes::try_from(SerializedBytes::from(UnsafeBytes::from(ROOT.to_vec())))
14            .expect("This cannot fail as it's under the max entry bytes"),
15    ))
16    .map(Into::into)
17}
18
19/// Allows for "foo.bar.baz" to automatically move to/from ["foo", "bar", "baz"] components.
20/// Technically it's moving each string component in as bytes.
21/// If this is a problem for you simply build the components yourself as a `Vec<Vec<u8>>`.
22///
23/// See `impl From<String> for Path` below.
24pub const DELIMITER: &str = ".";
25
26/// Each path component is arbitrary bytes to be hashed together in a predictable way when the path
27/// is hashed to create something that can be linked and discovered by all DHT participants.
28#[derive(
29    Clone, PartialEq, Debug, Default, serde::Deserialize, serde::Serialize, SerializedBytes,
30)]
31#[repr(transparent)]
32pub struct Component(#[serde(with = "serde_bytes")] Vec<u8>);
33
34impl Component {
35    pub fn new(v: Vec<u8>) -> Self {
36        Self(v)
37    }
38}
39
40/// Wrap bytes.
41impl From<Vec<u8>> for Component {
42    fn from(v: Vec<u8>) -> Self {
43        Self(v)
44    }
45}
46
47/// Access bytes.
48impl AsRef<[u8]> for Component {
49    fn as_ref(&self) -> &[u8] {
50        self.0.as_ref()
51    }
52}
53
54/// Unwrap bytes.
55impl From<Component> for Vec<u8> {
56    fn from(component: Component) -> Self {
57        component.0
58    }
59}
60
61/// Build a component from a String.
62///
63/// For many simple use cases we can construct a path out of a string similar to a URI.
64/// We represent this using the utf32 bytes rather than the utf8 bytes for the chars in the string
65/// which gives us a fixed width encoding for strings, which gives us a clean/easy way to support
66/// sharding based on strings by iterating over u32s rather than deciding what to do with variable
67/// width u8 or u16 characters.
68///
69/// IMPORTANT: if you are not using sharding and make heavy use of [`Path`] then
70/// consider building your [`Component`] directly from `my_string.as_bytes()` to
71/// achieve much more compact utf8 representations of each [`Component`].
72impl From<&str> for Component {
73    fn from(s: &str) -> Self {
74        let bytes: Vec<u8> = s
75            .chars()
76            .flat_map(|c| (c as u32).to_le_bytes().to_vec())
77            .collect();
78        Self::from(bytes)
79    }
80}
81
82/// Alias From<&str>
83impl From<&String> for Component {
84    fn from(s: &String) -> Self {
85        Self::from(s.as_str())
86    }
87}
88
89/// Alias From<&str>
90impl From<String> for Component {
91    fn from(s: String) -> Self {
92        Self::from(s.as_str())
93    }
94}
95
96/// Restoring a [`String`] from a [`Component`] requires [`Vec<u8>`] to [`u32`] to utf8 handling.
97impl TryFrom<&Component> for String {
98    type Error = SerializedBytesError;
99    fn try_from(component: &Component) -> Result<Self, Self::Error> {
100        if component.as_ref().len() % 4 != 0 {
101            return Err(SerializedBytesError::Deserialize(format!(
102                "attempted to create u32s from utf8 bytes of length not a factor of 4: length {}",
103                component.as_ref().len()
104            )));
105        }
106        let (chars, _, error) = component
107            .as_ref()
108            .iter()
109            // @todo this algo seems a bit inefficient but also i'm not sure how much that
110            // matters in reality, maybe a premature optimisation to do anything else
111            .fold(
112                (vec![], vec![], None),
113                |(mut chars, mut build, mut error), b| {
114                    if error.is_none() {
115                        build.push(*b);
116                        if build.len() == std::mem::size_of::<u32>() {
117                            // Convert the build vector into 4 le_bytes for the u32.
118                            // This is an unwrap because we already check the total length above.
119                            let le_bytes = build[0..std::mem::size_of::<u32>()].try_into().unwrap();
120                            let u = u32::from_le_bytes(le_bytes);
121                            match std::char::from_u32(u) {
122                                Some(c) => {
123                                    chars.push(c);
124                                    build = vec![];
125                                }
126                                None => {
127                                    error = Some(Err(SerializedBytesError::Deserialize(format!(
128                                        "unknown char for u32: {}",
129                                        u
130                                    ))));
131                                }
132                            }
133                        }
134                    }
135                    (chars, build, error)
136                },
137            );
138        match error {
139            Some(error) => error,
140            None => Ok(chars.iter().collect::<String>()),
141        }
142    }
143}
144
145/// A [`Path`] is a vector of [`Component`]s.
146///
147/// It represents a single traversal of a tree structure down to some arbitrary point.
148/// The main intent is that we can recursively walk back up the tree, hashing, committing and
149/// linking each sub-path along the way until we reach the root.
150///
151/// At this point it is possible to follow DHT links from the root back up the path,
152/// i.e. the ahead-of-time predictability of the hashes of a given path allows us to travel "up"
153/// the tree and the linking functionality of the Holochain DHT allows us to travel "down" the tree
154/// after at least one DHT participant has followed the path "up".
155#[derive(
156    Clone, Debug, PartialEq, Default, serde::Deserialize, serde::Serialize, SerializedBytes,
157)]
158#[repr(transparent)]
159pub struct Path(pub Vec<Component>);
160
161#[derive(Clone, Debug, PartialEq, serde::Deserialize, serde::Serialize, SerializedBytes)]
162/// A [`LinkType`] applied to a [`Path`].
163///
164/// All links committed from this path will have this link type.
165///
166/// Get a typed path from a path and a link type:
167/// ```ignore
168/// let typed_path = path.typed(LinkTypes::MyLink)?;
169/// ```
170pub struct TypedPath {
171    /// The [`LinkType`] within the scope of the zome where it's defined.
172    pub link_type: ScopedLinkType,
173    /// The [`Path`] that is using this [`LinkType`].
174    pub path: Path,
175}
176
177/// Wrap components vector.
178impl From<Vec<Component>> for Path {
179    fn from(components: Vec<Component>) -> Self {
180        Self(components)
181    }
182}
183
184/// Unwrap components vector.
185impl From<Path> for Vec<Component> {
186    fn from(path: Path) -> Self {
187        path.0
188    }
189}
190
191/// Access components vector.
192impl AsRef<Vec<Component>> for Path {
193    fn as_ref(&self) -> &Vec<Component> {
194        self.0.as_ref()
195    }
196}
197
198/// Split a string path out into a vector of components.
199/// This allows us to construct pseudo-URI-path-things as strings.
200/// It is a simpler scheme than URLs and file paths.
201/// Leading and trailing slashes are ignored as are duplicate dots and the empty string leads
202/// to a path with zero length (no components).
203///
204/// e.g. all the following result in the same components as `vec!["foo", "bar"]` (as bytes)
205/// - foo.bar
206/// - foo.bar.
207/// - .foo.bar
208/// - .foo.bar.
209/// - foo..bar
210///
211/// There is no normalisation of paths, e.g. to guarantee a specific root component exists, at this
212/// layer so there is a risk that there are hash collisions with other data on the DHT network if
213/// some disambiguation logic is not included in higher level abstractions.
214///
215/// This supports sharding strategies from a small inline DSL.
216/// Start each component with `<width>:<depth>#` to get shards out of the string.
217///
218/// e.g.
219/// - foo.barbaz => normal path as above ["foo", "barbaz"]
220/// - foo.1:3#barbazii => width 1, depth 3, ["foo", "b", "a", "r", "barbazii"]
221/// - foo.2:3#barbazii => width 2, depth 3, ["foo", "ba", "rb", "az", "barbazii"]
222///
223/// Note that this all works because the components and sharding for strings maps to fixed-width
224/// utf32 bytes under the hood rather than variable width bytes.
225impl From<&str> for Path {
226    fn from(s: &str) -> Self {
227        Self(
228            s.split(DELIMITER)
229                .filter(|s| !s.is_empty())
230                .flat_map(|s| match ShardStrategy::from_str(s) {
231                    // Handle a strategy if one is found.
232                    Ok(strategy) => {
233                        let (_strategy, component) = s.split_at(s.find(SHARDEND).unwrap());
234                        let component = component.trim_start_matches(SHARDEND);
235                        let shard_path = Path::from((&strategy, component));
236                        let mut shard_components: Vec<Component> = shard_path.into();
237                        shard_components.push(Component::from(component));
238                        shard_components
239                    }
240                    // No strategy. Use the component directly.
241                    Err(_) => vec![Component::from(s)],
242                })
243                .collect(),
244        )
245    }
246}
247
248/// Alias From<&str>
249impl From<&String> for Path {
250    fn from(s: &String) -> Self {
251        Self::from(s.as_str())
252    }
253}
254
255/// Alias From<&str>
256impl From<String> for Path {
257    fn from(s: String) -> Self {
258        Self::from(s.as_str())
259    }
260}
261
262impl TryInto<String> for Path {
263    type Error = SerializedBytesError;
264    fn try_into(self) -> Result<String, Self::Error> {
265        let s = self
266            .as_ref()
267            .iter()
268            .map(String::try_from)
269            .collect::<Result<Vec<String>, Self::Error>>()?;
270
271        Ok(s.join(DELIMITER))
272    }
273}
274
275impl Path {
276    /// Attach a [`LinkType`] to this path so its type is known for test utility functions.
277    pub fn into_typed(self, link_type: impl Into<ScopedLinkType>) -> TypedPath {
278        TypedPath::new(link_type, self)
279    }
280
281    /// Try attaching a [`LinkType`] to this path so its type is known for test utility functions.
282    pub fn typed<TY, E>(self, link_type: TY) -> Result<TypedPath, WasmError>
283    where
284        ScopedLinkType: TryFrom<TY, Error = E>,
285        WasmError: From<E>,
286    {
287        Ok(TypedPath::new(ScopedLinkType::try_from(link_type)?, self))
288    }
289    /// What is the hash for the current [`Path`]?
290    pub fn path_entry_hash(&self) -> ExternResult<holo_hash::EntryHash> {
291        hash_entry(Entry::App(AppEntryBytes(
292            SerializedBytes::try_from(self).map_err(|e| wasm_error!(e))?,
293        )))
294    }
295
296    /// Mutate this [`Path`] into a child of itself by appending a [`Component`].
297    pub fn append_component(&mut self, component: Component) {
298        self.0.push(component);
299    }
300
301    /// Accessor for the last [`Component`] of this [`Path`].
302    /// This can be thought of as the leaf of the implied tree structure of
303    /// which this [`Path`] is one branch of.
304    pub fn leaf(&self) -> Option<&Component> {
305        self.0.last()
306    }
307
308    /// Make the [`LinkTag`] for this [`Path`].
309    pub fn make_tag(&self) -> ExternResult<LinkTag> {
310        Ok(LinkTag::new(match self.leaf() {
311            None => <Vec<u8>>::with_capacity(0),
312            Some(component) => {
313                UnsafeBytes::from(SerializedBytes::try_from(component).map_err(|e| wasm_error!(e))?)
314                    .into()
315            }
316        }))
317    }
318
319    /// Check if this [`Path`] is the root.
320    pub fn is_root(&self) -> bool {
321        self.0.len() == 1
322    }
323}
324
325impl TypedPath {
326    /// Create a new [`TypedPath`] by attaching a [`ZomeIndex`] and [`LinkType`] to a [`Path`].
327    pub fn new(link_type: impl Into<ScopedLinkType>, path: Path) -> Self {
328        Self {
329            link_type: link_type.into(),
330            path,
331        }
332    }
333
334    /// The parent of the current path is simply the path truncated one level.
335    pub fn parent(&self) -> Option<Self> {
336        if self.path.as_ref().len() > 1 {
337            let parent_vec: Vec<Component> =
338                self.path.as_ref()[0..self.path.as_ref().len() - 1].to_vec();
339            Some(Path::from(parent_vec).into_typed(self.link_type))
340        } else {
341            None
342        }
343    }
344}
345
346impl std::ops::Deref for TypedPath {
347    type Target = Path;
348
349    fn deref(&self) -> &Self::Target {
350        &self.path
351    }
352}
353
354impl From<TypedPath> for Path {
355    fn from(p: TypedPath) -> Self {
356        p.path
357    }
358}
359
360impl TryInto<String> for TypedPath {
361    type Error = SerializedBytesError;
362    fn try_into(self) -> Result<String, Self::Error> {
363        self.path.try_into()
364    }
365}
366
367#[test]
368#[cfg(test)]
369fn hash_path_delimiter() {
370    assert_eq!(".", DELIMITER,);
371}
372
373#[test]
374#[cfg(test)]
375fn hash_path_component() {
376    use ::fixt::prelude::*;
377
378    let bytes: Vec<u8> = U8Fixturator::new(Unpredictable).take(5).collect();
379
380    let component = Component::from(bytes.clone());
381
382    assert_eq!(bytes, component.as_ref(),);
383
384    assert_eq!(
385        Component::from(vec![102, 0, 0, 0, 111, 0, 0, 0, 111, 0, 0, 0]),
386        Component::from("foo"),
387    );
388
389    assert_eq!(
390        String::try_from(&Component::from(vec![
391            102, 0, 0, 0, 111, 0, 0, 0, 111, 0, 0, 0,
392        ]))
393        .unwrap(),
394        String::from("foo"),
395    );
396
397    assert_eq!(
398        String::try_from(&Component::from(vec![1])),
399        Err(SerializedBytesError::Deserialize(
400            "attempted to create u32s from utf8 bytes of length not a factor of 4: length 1".into()
401        )),
402    );
403    assert_eq!(
404        String::try_from(&Component::from(vec![9, 9, 9, 9])),
405        Err(SerializedBytesError::Deserialize(
406            "unknown char for u32: 151587081".into()
407        )),
408    );
409}
410
411#[test]
412#[cfg(test)]
413fn hash_path_path() {
414    use ::fixt::prelude::*;
415
416    let components: Vec<Component> = {
417        let mut vec = vec![];
418        for _ in 0..10 {
419            let bytes: Vec<u8> = U8Fixturator::new(Unpredictable).take(10).collect();
420            vec.push(Component::from(bytes))
421        }
422        vec
423    };
424
425    assert_eq!(&components, Path::from(components.clone()).as_ref(),);
426
427    for (input, output) in vec![
428        ("", vec![]),
429        (".", vec![]),
430        (".foo", vec![Component::from("foo")]),
431        ("foo", vec![Component::from("foo")]),
432        ("foo.", vec![Component::from("foo")]),
433        (".foo.", vec![Component::from("foo")]),
434        (
435            ".foo.bar",
436            vec![Component::from("foo"), Component::from("bar")],
437        ),
438        (
439            ".foo.bar.",
440            vec![Component::from("foo"), Component::from("bar")],
441        ),
442        (
443            "foo.bar",
444            vec![Component::from("foo"), Component::from("bar")],
445        ),
446        (
447            "foo.bar.",
448            vec![Component::from("foo"), Component::from("bar")],
449        ),
450        (
451            "foo..bar",
452            vec![Component::from("foo"), Component::from("bar")],
453        ),
454        (
455            "foo.1:3#abcdef",
456            vec![
457                Component::from("foo"),
458                Component::from("a"),
459                Component::from("b"),
460                Component::from("c"),
461                Component::from("abcdef"),
462            ],
463        ),
464        (
465            "foo.2:3#zzzzzzzzzz",
466            vec![
467                Component::from("foo"),
468                Component::from("zz"),
469                Component::from("zz"),
470                Component::from("zz"),
471                Component::from("zzzzzzzzzz"),
472            ],
473        ),
474        (
475            "foo.1:3#abcdef.bar",
476            vec![
477                Component::from("foo"),
478                Component::from("a"),
479                Component::from("b"),
480                Component::from("c"),
481                Component::from("abcdef"),
482                Component::from("bar"),
483            ],
484        ),
485    ] {
486        assert_eq!(Path::from(input), Path::from(output),);
487    }
488
489    let path = "foo.a.b.c.abcdef.bar";
490    let path_to_string: String = Path::from(path).try_into().unwrap();
491    assert_eq!(path.to_string(), path_to_string,);
492}