forest/utils/encoding/
mod.rs

1// Copyright 2019-2025 ChainSafe Systems
2// SPDX-License-Identifier: Apache-2.0, MIT
3
4use crate::shim::address::Address;
5use blake2b_simd::Params;
6use fil_actors_shared::filecoin_proofs_api::ProverId;
7use fvm_ipld_encoding::strict_bytes::{Deserialize, Serialize};
8use serde::{Deserializer, Serializer, de, ser};
9
10mod fallback_de_ipld_dagcbor;
11
12/// This method will attempt to de-serialize given bytes using the regular
13/// `serde_ipld_dagcbor::from_slice`. Due to a historical issue in Lotus (see more in
14/// [FIP-0027](https://github.com/filecoin-project/FIPs/blob/master/FIPS/fip-0027.md), we must still
15/// support strings with invalid UTF-8 bytes. On a failure, it
16/// will retry the operation using the fallback that will de-serialize
17/// strings with invalid UTF-8 bytes as bytes.
18pub fn from_slice_with_fallback<'a, T: serde::de::Deserialize<'a>>(
19    bytes: &'a [u8],
20) -> anyhow::Result<T> {
21    match serde_ipld_dagcbor::from_slice(bytes) {
22        Ok(v) => Ok(v),
23        Err(err) => fallback_de_ipld_dagcbor::from_slice(bytes).map_err(|fallback_err| {
24            anyhow::anyhow!(
25                "Fallback deserialization failed: {fallback_err}. Original error: {err}"
26            )
27        }),
28    }
29}
30
31mod cid_de_cbor;
32pub use cid_de_cbor::extract_cids;
33
34/// `serde_bytes` with max length check
35pub mod serde_byte_array {
36    use super::*;
37    /// lotus use cbor-gen for generating codec for types, it has a length limit
38    /// for byte array as `2 << 20`
39    ///
40    /// <https://github.com/whyrusleeping/cbor-gen/blob/f57984553008dd4285df16d4ec2760f97977d713/gen.go#L16>
41    pub const BYTE_ARRAY_MAX_LEN: usize = 2 << 20;
42
43    /// checked if `input > crate::utils::BYTE_ARRAY_MAX_LEN`
44    pub fn serialize<T, S>(bytes: &T, serializer: S) -> Result<S::Ok, S::Error>
45    where
46        T: ?Sized + Serialize + AsRef<[u8]>,
47        S: Serializer,
48    {
49        let len = bytes.as_ref().len();
50        if len > BYTE_ARRAY_MAX_LEN {
51            return Err(ser::Error::custom::<String>(
52                "Array exceed max length".into(),
53            ));
54        }
55
56        Serialize::serialize(bytes, serializer)
57    }
58
59    /// checked if `output > crate::utils::ByteArrayMaxLen`
60    pub fn deserialize<'de, T, D>(deserializer: D) -> Result<T, D::Error>
61    where
62        T: Deserialize<'de> + AsRef<[u8]>,
63        D: Deserializer<'de>,
64    {
65        Deserialize::deserialize(deserializer).and_then(|bytes: T| {
66            if bytes.as_ref().len() > BYTE_ARRAY_MAX_LEN {
67                Err(de::Error::custom::<String>(
68                    "Array exceed max length".into(),
69                ))
70            } else {
71                Ok(bytes)
72            }
73        })
74    }
75}
76
77/// Generates BLAKE2b hash of fixed 32 bytes size.
78///
79/// # Example
80/// ```
81/// # use forest::doctest_private::blake2b_256;
82///
83/// let ingest: Vec<u8> = vec![];
84/// let hash = blake2b_256(&ingest);
85/// assert_eq!(hash.len(), 32);
86/// ```
87pub fn blake2b_256(ingest: &[u8]) -> [u8; 32] {
88    let digest = Params::new()
89        .hash_length(32)
90        .to_state()
91        .update(ingest)
92        .finalize();
93
94    let mut ret = [0u8; 32];
95    ret.clone_from_slice(digest.as_bytes());
96    ret
97}
98
99/// Generates Keccak-256 hash of fixed 32 bytes size.
100///
101/// # Example
102/// ```
103/// # use forest::doctest_private::keccak_256;
104/// let ingest: Vec<u8> = vec![];
105/// let hash = keccak_256(&ingest);
106/// assert_eq!(hash.len(), 32);
107/// ```
108pub fn keccak_256(ingest: &[u8]) -> [u8; 32] {
109    let mut ret: [u8; 32] = Default::default();
110    keccak_hash::keccak_256(ingest, &mut ret);
111    ret
112}
113
114pub fn prover_id_from_u64(id: u64) -> ProverId {
115    let mut prover_id = ProverId::default();
116    let prover_bytes = Address::new_id(id).payload().to_raw_bytes();
117    assert!(prover_bytes.len() <= prover_id.len());
118    #[allow(clippy::indexing_slicing)]
119    prover_id[..prover_bytes.len()].copy_from_slice(&prover_bytes);
120    prover_id
121}
122
123#[cfg(test)]
124mod tests {
125    use ipld_core::ipld::Ipld;
126    use itertools::Itertools as _;
127    use rand::Rng;
128    use serde::{Deserialize, Serialize};
129    use serde_ipld_dagcbor::to_vec;
130
131    use super::*;
132    use crate::utils::encoding::serde_byte_array::BYTE_ARRAY_MAX_LEN;
133
134    #[test]
135    fn vector_hashing() {
136        let ing_vec = vec![1, 2, 3];
137
138        assert_eq!(blake2b_256(&ing_vec), blake2b_256(&[1, 2, 3]));
139        assert_ne!(blake2b_256(&ing_vec), blake2b_256(&[1, 2, 3, 4]));
140    }
141
142    #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
143    struct ByteArray {
144        #[serde(with = "serde_byte_array")]
145        pub inner: Vec<u8>,
146    }
147
148    #[test]
149    fn can_serialize_byte_array() {
150        for len in [0, 1, BYTE_ARRAY_MAX_LEN] {
151            let bytes = ByteArray {
152                inner: vec![0; len],
153            };
154
155            assert!(serde_ipld_dagcbor::to_vec(&bytes).is_ok());
156        }
157    }
158
159    #[test]
160    fn cannot_serialize_byte_array_overflow() {
161        let bytes = ByteArray {
162            inner: vec![0; BYTE_ARRAY_MAX_LEN + 1],
163        };
164
165        let err = serde_ipld_dagcbor::to_vec(&bytes).unwrap_err();
166        assert!(
167            format!("{err}").contains("Struct value cannot be serialized."),
168            "{err}"
169        );
170    }
171
172    #[test]
173    fn can_deserialize_byte_array() {
174        for len in [0, 1, BYTE_ARRAY_MAX_LEN] {
175            let bytes = ByteArray {
176                inner: vec![0; len],
177            };
178
179            let encoding = serde_ipld_dagcbor::to_vec(&bytes).unwrap();
180            assert_eq!(
181                from_slice_with_fallback::<ByteArray>(&encoding).unwrap(),
182                bytes
183            );
184        }
185    }
186
187    #[test]
188    fn cannot_deserialize_byte_array_overflow() {
189        let max_length_bytes = ByteArray {
190            inner: vec![0; BYTE_ARRAY_MAX_LEN],
191        };
192
193        // prefix: 2 ^ 21 -> 2 ^ 21 + 1
194        let mut overflow_encoding = serde_ipld_dagcbor::to_vec(&max_length_bytes).unwrap();
195        let encoding_len = overflow_encoding.len();
196        overflow_encoding[encoding_len - BYTE_ARRAY_MAX_LEN - 1] = 1;
197        overflow_encoding.push(0);
198
199        assert!(
200            format!(
201                "{}",
202                from_slice_with_fallback::<ByteArray>(&overflow_encoding)
203                    .err()
204                    .unwrap()
205            )
206            .contains("Array exceed max length")
207        );
208    }
209
210    #[test]
211    fn parity_tests() {
212        use cs_serde_bytes;
213
214        #[derive(Deserialize, Serialize)]
215        struct A(#[serde(with = "fvm_ipld_encoding::strict_bytes")] Vec<u8>);
216
217        #[derive(Deserialize, Serialize)]
218        struct B(#[serde(with = "cs_serde_bytes")] Vec<u8>);
219
220        let mut array = [0; 1024];
221        crate::utils::rand::forest_rng().fill(&mut array);
222
223        let a = A(array.to_vec());
224        let b = B(array.to_vec());
225
226        assert_eq!(
227            serde_json::to_string_pretty(&a).unwrap(),
228            serde_json::to_string_pretty(&b).unwrap()
229        );
230    }
231
232    #[test]
233    fn test_fallback_deserialization() {
234        // where the regular deserialization fails with invalid UTF-8 strings, the fallback should
235        // succeed.
236
237        // Valid UTF-8, should return the same results.
238        let ipld_string = Ipld::String("cthulhu".to_string());
239        let serialized = to_vec(&ipld_string).unwrap();
240        assert_eq!(
241            ipld_string,
242            serde_ipld_dagcbor::from_slice::<Ipld>(&serialized).unwrap()
243        );
244        assert_eq!(
245            ipld_string,
246            from_slice_with_fallback::<Ipld>(&serialized).unwrap()
247        );
248
249        // Invalid UTF-8, regular deserialization fails, fallback succeeds. We can
250        // extract the bytes.
251        let corrupted = serialized
252            .iter()
253            .take(serialized.len() - 2)
254            .chain(&[0xa0, 0xa1])
255            .copied()
256            .collect_vec();
257        assert!(
258            matches!(from_slice_with_fallback::<Ipld>(&corrupted).unwrap(), Ipld::Bytes(bytes) if bytes == [0x63, 0x74, 0x68, 0x75, 0x6c, 0xa0, 0xa1])
259        )
260    }
261}