Skip to main content

forest/utils/encoding/
mod.rs

1// Copyright 2019-2026 ChainSafe Systems
2// SPDX-License-Identifier: Apache-2.0, MIT
3
4use crate::shim::address::Address;
5use blake2b_simd::Params;
6use fil_actors_shared::filecoin_proofs_api::ProverId;
7use fvm_ipld_encoding::strict_bytes::{Deserialize, Serialize};
8use serde::{Deserializer, Serializer, de, ser};
9
10mod fallback_de_ipld_dagcbor;
11
12/// Limit the the number of bytes that are used for pre-allocating `Vec<Cid>`s. This follows what `serde` is
13/// doing internally with `serde::private::size_hint::cautious()`.
14/// The limit is set to 1 MiB, which is a reasonable upper bound for most use cases.
15fn size_hint_cautious_cid(size_hint: usize) -> usize {
16    const MAX_PREALLOC_BYTES: usize = 1024 * 1024;
17    size_hint.min(MAX_PREALLOC_BYTES / std::mem::size_of::<cid::Cid>())
18}
19
20/// This method will attempt to de-serialize given bytes using the regular
21/// `serde_ipld_dagcbor::from_slice`. Due to a historical issue in Lotus (see more in
22/// [FIP-0027](https://github.com/filecoin-project/FIPs/blob/master/FIPS/fip-0027.md), we must still
23/// support strings with invalid UTF-8 bytes. On a failure, it
24/// will retry the operation using the fallback that will de-serialize
25/// strings with invalid UTF-8 bytes as bytes.
26pub fn from_slice_with_fallback<'a, T: serde::de::Deserialize<'a>>(
27    bytes: &'a [u8],
28) -> anyhow::Result<T> {
29    match serde_ipld_dagcbor::from_slice(bytes) {
30        Ok(v) => Ok(v),
31        Err(err) => fallback_de_ipld_dagcbor::from_slice(bytes).map_err(|fallback_err| {
32            anyhow::anyhow!(
33                "Fallback deserialization failed: {fallback_err}. Original error: {err}"
34            )
35        }),
36    }
37}
38
39mod cid_de_cbor;
40pub use cid_de_cbor::extract_cids;
41
42/// `serde_bytes` with max length check
43pub mod serde_byte_array {
44    use super::*;
45    /// lotus use cbor-gen for generating codec for types, it has a length limit
46    /// for byte array as `2 << 20`
47    ///
48    /// <https://github.com/whyrusleeping/cbor-gen/blob/f57984553008dd4285df16d4ec2760f97977d713/gen.go#L16>
49    pub const BYTE_ARRAY_MAX_LEN: usize = 2 << 20;
50
51    /// checked if `input > crate::utils::BYTE_ARRAY_MAX_LEN`
52    pub fn serialize<T, S>(bytes: &T, serializer: S) -> Result<S::Ok, S::Error>
53    where
54        T: ?Sized + Serialize + AsRef<[u8]>,
55        S: Serializer,
56    {
57        let len = bytes.as_ref().len();
58        if len > BYTE_ARRAY_MAX_LEN {
59            return Err(ser::Error::custom::<String>(
60                "Array exceed max length".into(),
61            ));
62        }
63
64        Serialize::serialize(bytes, serializer)
65    }
66
67    /// checked if `output > crate::utils::ByteArrayMaxLen`
68    pub fn deserialize<'de, T, D>(deserializer: D) -> Result<T, D::Error>
69    where
70        T: Deserialize<'de> + AsRef<[u8]>,
71        D: Deserializer<'de>,
72    {
73        Deserialize::deserialize(deserializer).and_then(|bytes: T| {
74            if bytes.as_ref().len() > BYTE_ARRAY_MAX_LEN {
75                Err(de::Error::custom::<String>(
76                    "Array exceed max length".into(),
77                ))
78            } else {
79                Ok(bytes)
80            }
81        })
82    }
83}
84
85/// Generates BLAKE2b hash of fixed 32 bytes size.
86///
87/// # Example
88/// ```
89/// # use forest::doctest_private::blake2b_256;
90///
91/// let ingest: Vec<u8> = vec![];
92/// let hash = blake2b_256(&ingest);
93/// assert_eq!(hash.len(), 32);
94/// ```
95pub fn blake2b_256(ingest: &[u8]) -> [u8; 32] {
96    let digest = Params::new()
97        .hash_length(32)
98        .to_state()
99        .update(ingest)
100        .finalize();
101
102    let mut ret = [0u8; 32];
103    ret.clone_from_slice(digest.as_bytes());
104    ret
105}
106
107/// Generates Keccak-256 hash of fixed 32 bytes size.
108///
109/// # Example
110/// ```
111/// # use forest::doctest_private::keccak_256;
112/// let ingest: Vec<u8> = vec![];
113/// let hash = keccak_256(&ingest);
114/// assert_eq!(hash.len(), 32);
115/// ```
116pub fn keccak_256(ingest: &[u8]) -> [u8; 32] {
117    let mut ret: [u8; 32] = Default::default();
118    keccak_hash::keccak_256(ingest, &mut ret);
119    ret
120}
121
122pub fn prover_id_from_u64(id: u64) -> ProverId {
123    let mut prover_id = ProverId::default();
124    let prover_bytes = Address::new_id(id).payload().to_raw_bytes();
125    assert!(prover_bytes.len() <= prover_id.len());
126    #[allow(clippy::indexing_slicing)]
127    prover_id[..prover_bytes.len()].copy_from_slice(&prover_bytes);
128    prover_id
129}
130
131#[cfg(test)]
132mod tests {
133    use ipld_core::ipld::Ipld;
134    use itertools::Itertools as _;
135    use rand::Rng;
136    use serde::{Deserialize, Serialize};
137    use serde_ipld_dagcbor::to_vec;
138
139    use super::*;
140    use crate::utils::encoding::serde_byte_array::BYTE_ARRAY_MAX_LEN;
141
142    #[test]
143    fn vector_hashing() {
144        let ing_vec = vec![1, 2, 3];
145
146        assert_eq!(blake2b_256(&ing_vec), blake2b_256(&[1, 2, 3]));
147        assert_ne!(blake2b_256(&ing_vec), blake2b_256(&[1, 2, 3, 4]));
148    }
149
150    #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
151    struct ByteArray {
152        #[serde(with = "serde_byte_array")]
153        pub inner: Vec<u8>,
154    }
155
156    #[test]
157    fn can_serialize_byte_array() {
158        for len in [0, 1, BYTE_ARRAY_MAX_LEN] {
159            let bytes = ByteArray {
160                inner: vec![0; len],
161            };
162
163            assert!(serde_ipld_dagcbor::to_vec(&bytes).is_ok());
164        }
165    }
166
167    #[test]
168    fn cannot_serialize_byte_array_overflow() {
169        let bytes = ByteArray {
170            inner: vec![0; BYTE_ARRAY_MAX_LEN + 1],
171        };
172
173        let err = serde_ipld_dagcbor::to_vec(&bytes).unwrap_err();
174        assert!(
175            format!("{err}").contains("Struct value cannot be serialized."),
176            "{err}"
177        );
178    }
179
180    #[test]
181    fn can_deserialize_byte_array() {
182        for len in [0, 1, BYTE_ARRAY_MAX_LEN] {
183            let bytes = ByteArray {
184                inner: vec![0; len],
185            };
186
187            let encoding = serde_ipld_dagcbor::to_vec(&bytes).unwrap();
188            assert_eq!(
189                from_slice_with_fallback::<ByteArray>(&encoding).unwrap(),
190                bytes
191            );
192        }
193    }
194
195    #[test]
196    fn cannot_deserialize_byte_array_overflow() {
197        let max_length_bytes = ByteArray {
198            inner: vec![0; BYTE_ARRAY_MAX_LEN],
199        };
200
201        // prefix: 2 ^ 21 -> 2 ^ 21 + 1
202        let mut overflow_encoding = serde_ipld_dagcbor::to_vec(&max_length_bytes).unwrap();
203        let encoding_len = overflow_encoding.len();
204        overflow_encoding[encoding_len - BYTE_ARRAY_MAX_LEN - 1] = 1;
205        overflow_encoding.push(0);
206
207        assert!(
208            format!(
209                "{}",
210                from_slice_with_fallback::<ByteArray>(&overflow_encoding)
211                    .err()
212                    .unwrap()
213            )
214            .contains("Array exceed max length")
215        );
216    }
217
218    #[test]
219    fn parity_tests() {
220        use cs_serde_bytes;
221
222        #[derive(Deserialize, Serialize)]
223        struct A(#[serde(with = "fvm_ipld_encoding::strict_bytes")] Vec<u8>);
224
225        #[derive(Deserialize, Serialize)]
226        struct B(#[serde(with = "cs_serde_bytes")] Vec<u8>);
227
228        let mut array = [0; 1024];
229        crate::utils::rand::forest_rng().fill(&mut array);
230
231        let a = A(array.to_vec());
232        let b = B(array.to_vec());
233
234        assert_eq!(
235            serde_json::to_string_pretty(&a).unwrap(),
236            serde_json::to_string_pretty(&b).unwrap()
237        );
238    }
239
240    #[test]
241    fn test_fallback_deserialization() {
242        // where the regular deserialization fails with invalid UTF-8 strings, the fallback should
243        // succeed.
244
245        // Valid UTF-8, should return the same results.
246        let ipld_string = Ipld::String("cthulhu".to_string());
247        let serialized = to_vec(&ipld_string).unwrap();
248        assert_eq!(
249            ipld_string,
250            serde_ipld_dagcbor::from_slice::<Ipld>(&serialized).unwrap()
251        );
252        assert_eq!(
253            ipld_string,
254            from_slice_with_fallback::<Ipld>(&serialized).unwrap()
255        );
256
257        // Invalid UTF-8, regular deserialization fails, fallback succeeds. We can
258        // extract the bytes.
259        let corrupted = serialized
260            .iter()
261            .take(serialized.len() - 2)
262            .chain(&[0xa0, 0xa1])
263            .copied()
264            .collect_vec();
265        assert!(
266            matches!(from_slice_with_fallback::<Ipld>(&corrupted).unwrap(), Ipld::Bytes(bytes) if bytes == [0x63, 0x74, 0x68, 0x75, 0x6c, 0xa0, 0xa1])
267        )
268    }
269
270    #[test]
271    fn size_hint_cautious_test() {
272        assert_eq!(size_hint_cautious_cid(0), 0);
273        assert_eq!(
274            size_hint_cautious_cid(1024 * 1024),
275            1024 * 1024 / std::mem::size_of::<cid::Cid>()
276        );
277    }
278}