Skip to main content

aion_package/
hash.rs

1//! Content-hash computation over the canonical beam set.
2
3use std::{fmt, str::FromStr};
4
5use serde::{Deserialize, Deserializer, Serialize, Serializer, de};
6use sha2::{Digest, Sha256};
7
8use crate::BeamSet;
9
10const DIGEST_LEN: usize = 32;
11const TEXT_LEN: usize = DIGEST_LEN * 2;
12
13/// A SHA-256 content hash computed over the canonical BEAM module set only.
14///
15/// The digest is computed from each module's logical name and exact `.beam`
16/// bytes in [`BeamSet`] canonical order. Archive bytes, compression settings,
17/// timestamps, and optional source inclusion are never part of this hash, so the
18/// same compiled modules produce the same package version across deterministic
19/// and non-deterministic archive representations.
20///
21/// Its stable textual form is 64 lowercase hexadecimal characters. That text is
22/// the package version identifier stored in the manifest and the hash component
23/// embedded in namespaced deployed module names; it contains only `0-9a-f`,
24/// which is safe for a BEAM module-name component.
25#[derive(Clone, Debug, PartialEq, Eq, Hash)]
26pub struct ContentHash([u8; DIGEST_LEN]);
27
28impl ContentHash {
29    /// Creates a content hash from raw SHA-256 digest bytes.
30    #[must_use]
31    pub const fn from_bytes(bytes: [u8; DIGEST_LEN]) -> Self {
32        Self(bytes)
33    }
34
35    /// Returns the raw SHA-256 digest bytes.
36    #[must_use]
37    pub const fn as_bytes(&self) -> &[u8; DIGEST_LEN] {
38        &self.0
39    }
40}
41
42/// Errors produced when parsing a [`ContentHash`] textual form.
43#[derive(thiserror::Error, Clone, Debug, PartialEq, Eq)]
44pub enum ContentHashParseError {
45    /// The text was not exactly 64 ASCII hexadecimal characters.
46    #[error("content hash text must be 64 lowercase hexadecimal characters, found {found} bytes")]
47    InvalidLength {
48        /// Number of bytes found in the supplied text.
49        found: usize,
50    },
51
52    /// The text contained a character outside lowercase hexadecimal.
53    #[error("content hash text contains non-lowercase-hex byte 0x{byte:02x} at byte index {index}")]
54    InvalidCharacter {
55        /// Byte index of the invalid character.
56        index: usize,
57        /// Invalid byte found at `index`.
58        byte: u8,
59    },
60}
61
62/// Computes the package version hash over the canonical BEAM set only.
63///
64/// The SHA-256 algorithm is mandated by the `.aion` format contract so packers
65/// and loaders on different hosts agree. Each module contributes its logical
66/// name and exact bytes in [`BeamSet`] canonical order, with each field framed by
67/// an eight-byte big-endian length prefix. This unambiguous framing prevents a
68/// shifted name/body boundary from producing the same digest.
69#[must_use]
70pub fn content_hash(beams: &BeamSet) -> ContentHash {
71    let mut digest = Sha256::new();
72
73    for module in beams.iter() {
74        update_framed(&mut digest, module.name().as_bytes());
75        update_framed(&mut digest, module.bytes());
76    }
77
78    ContentHash(digest.finalize().into())
79}
80
81fn update_framed(digest: &mut Sha256, bytes: &[u8]) {
82    let length = bytes.len() as u64;
83    digest.update(length.to_be_bytes().as_slice());
84    digest.update(bytes);
85}
86
87impl fmt::Display for ContentHash {
88    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
89        for byte in &self.0 {
90            write!(formatter, "{byte:02x}")?;
91        }
92
93        Ok(())
94    }
95}
96
97impl FromStr for ContentHash {
98    type Err = ContentHashParseError;
99
100    fn from_str(text: &str) -> Result<Self, Self::Err> {
101        let bytes = text.as_bytes();
102        if bytes.len() != TEXT_LEN {
103            return Err(ContentHashParseError::InvalidLength { found: bytes.len() });
104        }
105
106        let mut digest = [0_u8; DIGEST_LEN];
107        for (index, pair) in bytes.chunks_exact(2).enumerate() {
108            let high_index = index * 2;
109            let low_index = high_index + 1;
110            digest[index] = (hex_value(pair[0], high_index)? << 4) | hex_value(pair[1], low_index)?;
111        }
112
113        Ok(Self(digest))
114    }
115}
116
117fn hex_value(byte: u8, index: usize) -> Result<u8, ContentHashParseError> {
118    match byte {
119        b'0'..=b'9' => Ok(byte - b'0'),
120        b'a'..=b'f' => Ok(byte - b'a' + 10),
121        _ => Err(ContentHashParseError::InvalidCharacter { index, byte }),
122    }
123}
124
125impl Serialize for ContentHash {
126    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
127    where
128        S: Serializer,
129    {
130        serializer.serialize_str(&self.to_string())
131    }
132}
133
134impl<'de> Deserialize<'de> for ContentHash {
135    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
136    where
137        D: Deserializer<'de>,
138    {
139        deserializer.deserialize_str(ContentHashVisitor)
140    }
141}
142
143struct ContentHashVisitor;
144
145impl de::Visitor<'_> for ContentHashVisitor {
146    type Value = ContentHash;
147
148    fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
149        formatter.write_str("a 64-character lowercase hexadecimal SHA-256 content hash")
150    }
151
152    fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
153    where
154        E: de::Error,
155    {
156        ContentHash::from_str(value).map_err(E::custom)
157    }
158}
159
160#[cfg(test)]
161mod tests {
162    use super::{ContentHash, content_hash};
163    use crate::{BeamModule, BeamSet, PackageError};
164
165    #[test]
166    fn content_hash_is_independent_of_insertion_order() -> Result<(), PackageError> {
167        let first = BeamSet::new(vec![
168            BeamModule::new("workflow/c", vec![3]),
169            BeamModule::new("workflow/a", vec![1]),
170            BeamModule::new("workflow/b", vec![2]),
171        ])?;
172        let second = BeamSet::new(vec![
173            BeamModule::new("workflow/b", vec![2]),
174            BeamModule::new("workflow/c", vec![3]),
175            BeamModule::new("workflow/a", vec![1]),
176        ])?;
177
178        assert_eq!(content_hash(&first), content_hash(&second));
179
180        Ok(())
181    }
182
183    #[test]
184    fn content_hash_changes_when_a_module_byte_changes() -> Result<(), PackageError> {
185        let original = BeamSet::new(vec![
186            BeamModule::new("workflow/a", vec![1, 2, 3]),
187            BeamModule::new("workflow/b", vec![4, 5, 6]),
188        ])?;
189        let changed = BeamSet::new(vec![
190            BeamModule::new("workflow/a", vec![1, 2, 3]),
191            BeamModule::new("workflow/b", vec![4, 5, 7]),
192        ])?;
193
194        assert_ne!(content_hash(&original), content_hash(&changed));
195
196        Ok(())
197    }
198
199    #[test]
200    fn content_hash_changes_when_a_module_name_changes() -> Result<(), PackageError> {
201        let original = BeamSet::new(vec![BeamModule::new("workflow/a", vec![1, 2, 3])])?;
202        let renamed = BeamSet::new(vec![BeamModule::new("workflow/renamed", vec![1, 2, 3])])?;
203
204        assert_ne!(content_hash(&original), content_hash(&renamed));
205
206        Ok(())
207    }
208
209    #[test]
210    fn content_hash_framing_prevents_name_bytes_boundary_ambiguity() -> Result<(), PackageError> {
211        let first = BeamSet::new(vec![BeamModule::new("ab", b"c".to_vec())])?;
212        let second = BeamSet::new(vec![BeamModule::new("a", b"bc".to_vec())])?;
213
214        assert_ne!(content_hash(&first), content_hash(&second));
215
216        Ok(())
217    }
218
219    #[test]
220    fn content_hash_text_round_trips() -> Result<(), PackageError> {
221        let beams = BeamSet::new(vec![BeamModule::new("workflow/a", vec![0, 1, 2, 255])])?;
222        let hash = content_hash(&beams);
223        let text = hash.to_string();
224        let parsed = text.parse::<ContentHash>();
225
226        assert_eq!(text.len(), 64);
227        assert!(
228            text.bytes()
229                .all(|byte| matches!(byte, b'0'..=b'9' | b'a'..=b'f'))
230        );
231        assert_eq!(parsed, Ok(hash));
232
233        Ok(())
234    }
235
236    #[test]
237    fn content_hash_rejects_uppercase_text() {
238        let text = "A000000000000000000000000000000000000000000000000000000000000000";
239
240        assert!(text.parse::<ContentHash>().is_err());
241    }
242}