1use std::{fmt, str::FromStr};
4
5use serde::{Deserialize, Deserializer, Serialize, Serializer, de};
6use sha2::{Digest, Sha256};
7
8use crate::BeamSet;
9
10const DIGEST_LEN: usize = 32;
11const TEXT_LEN: usize = DIGEST_LEN * 2;
12
13#[derive(Clone, Debug, PartialEq, Eq, Hash)]
26pub struct ContentHash([u8; DIGEST_LEN]);
27
28impl ContentHash {
29 #[must_use]
31 pub const fn from_bytes(bytes: [u8; DIGEST_LEN]) -> Self {
32 Self(bytes)
33 }
34
35 #[must_use]
37 pub const fn as_bytes(&self) -> &[u8; DIGEST_LEN] {
38 &self.0
39 }
40}
41
42#[derive(thiserror::Error, Clone, Debug, PartialEq, Eq)]
44pub enum ContentHashParseError {
45 #[error("content hash text must be 64 lowercase hexadecimal characters, found {found} bytes")]
47 InvalidLength {
48 found: usize,
50 },
51
52 #[error("content hash text contains non-lowercase-hex byte 0x{byte:02x} at byte index {index}")]
54 InvalidCharacter {
55 index: usize,
57 byte: u8,
59 },
60}
61
62#[must_use]
70pub fn content_hash(beams: &BeamSet) -> ContentHash {
71 let mut digest = Sha256::new();
72
73 for module in beams.iter() {
74 update_framed(&mut digest, module.name().as_bytes());
75 update_framed(&mut digest, module.bytes());
76 }
77
78 ContentHash(digest.finalize().into())
79}
80
81fn update_framed(digest: &mut Sha256, bytes: &[u8]) {
82 let length = bytes.len() as u64;
83 digest.update(length.to_be_bytes().as_slice());
84 digest.update(bytes);
85}
86
87impl fmt::Display for ContentHash {
88 fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
89 for byte in &self.0 {
90 write!(formatter, "{byte:02x}")?;
91 }
92
93 Ok(())
94 }
95}
96
97impl FromStr for ContentHash {
98 type Err = ContentHashParseError;
99
100 fn from_str(text: &str) -> Result<Self, Self::Err> {
101 let bytes = text.as_bytes();
102 if bytes.len() != TEXT_LEN {
103 return Err(ContentHashParseError::InvalidLength { found: bytes.len() });
104 }
105
106 let mut digest = [0_u8; DIGEST_LEN];
107 for (index, pair) in bytes.chunks_exact(2).enumerate() {
108 let high_index = index * 2;
109 let low_index = high_index + 1;
110 digest[index] = (hex_value(pair[0], high_index)? << 4) | hex_value(pair[1], low_index)?;
111 }
112
113 Ok(Self(digest))
114 }
115}
116
117fn hex_value(byte: u8, index: usize) -> Result<u8, ContentHashParseError> {
118 match byte {
119 b'0'..=b'9' => Ok(byte - b'0'),
120 b'a'..=b'f' => Ok(byte - b'a' + 10),
121 _ => Err(ContentHashParseError::InvalidCharacter { index, byte }),
122 }
123}
124
125impl Serialize for ContentHash {
126 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
127 where
128 S: Serializer,
129 {
130 serializer.serialize_str(&self.to_string())
131 }
132}
133
134impl<'de> Deserialize<'de> for ContentHash {
135 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
136 where
137 D: Deserializer<'de>,
138 {
139 deserializer.deserialize_str(ContentHashVisitor)
140 }
141}
142
143struct ContentHashVisitor;
144
145impl de::Visitor<'_> for ContentHashVisitor {
146 type Value = ContentHash;
147
148 fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
149 formatter.write_str("a 64-character lowercase hexadecimal SHA-256 content hash")
150 }
151
152 fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
153 where
154 E: de::Error,
155 {
156 ContentHash::from_str(value).map_err(E::custom)
157 }
158}
159
160#[cfg(test)]
161mod tests {
162 use super::{ContentHash, content_hash};
163 use crate::{BeamModule, BeamSet, PackageError};
164
165 #[test]
166 fn content_hash_is_independent_of_insertion_order() -> Result<(), PackageError> {
167 let first = BeamSet::new(vec![
168 BeamModule::new("workflow/c", vec![3]),
169 BeamModule::new("workflow/a", vec![1]),
170 BeamModule::new("workflow/b", vec![2]),
171 ])?;
172 let second = BeamSet::new(vec![
173 BeamModule::new("workflow/b", vec![2]),
174 BeamModule::new("workflow/c", vec![3]),
175 BeamModule::new("workflow/a", vec![1]),
176 ])?;
177
178 assert_eq!(content_hash(&first), content_hash(&second));
179
180 Ok(())
181 }
182
183 #[test]
184 fn content_hash_changes_when_a_module_byte_changes() -> Result<(), PackageError> {
185 let original = BeamSet::new(vec![
186 BeamModule::new("workflow/a", vec![1, 2, 3]),
187 BeamModule::new("workflow/b", vec![4, 5, 6]),
188 ])?;
189 let changed = BeamSet::new(vec![
190 BeamModule::new("workflow/a", vec![1, 2, 3]),
191 BeamModule::new("workflow/b", vec![4, 5, 7]),
192 ])?;
193
194 assert_ne!(content_hash(&original), content_hash(&changed));
195
196 Ok(())
197 }
198
199 #[test]
200 fn content_hash_changes_when_a_module_name_changes() -> Result<(), PackageError> {
201 let original = BeamSet::new(vec![BeamModule::new("workflow/a", vec![1, 2, 3])])?;
202 let renamed = BeamSet::new(vec![BeamModule::new("workflow/renamed", vec![1, 2, 3])])?;
203
204 assert_ne!(content_hash(&original), content_hash(&renamed));
205
206 Ok(())
207 }
208
209 #[test]
210 fn content_hash_framing_prevents_name_bytes_boundary_ambiguity() -> Result<(), PackageError> {
211 let first = BeamSet::new(vec![BeamModule::new("ab", b"c".to_vec())])?;
212 let second = BeamSet::new(vec![BeamModule::new("a", b"bc".to_vec())])?;
213
214 assert_ne!(content_hash(&first), content_hash(&second));
215
216 Ok(())
217 }
218
219 #[test]
220 fn content_hash_text_round_trips() -> Result<(), PackageError> {
221 let beams = BeamSet::new(vec![BeamModule::new("workflow/a", vec![0, 1, 2, 255])])?;
222 let hash = content_hash(&beams);
223 let text = hash.to_string();
224 let parsed = text.parse::<ContentHash>();
225
226 assert_eq!(text.len(), 64);
227 assert!(
228 text.bytes()
229 .all(|byte| matches!(byte, b'0'..=b'9' | b'a'..=b'f'))
230 );
231 assert_eq!(parsed, Ok(hash));
232
233 Ok(())
234 }
235
236 #[test]
237 fn content_hash_rejects_uppercase_text() {
238 let text = "A000000000000000000000000000000000000000000000000000000000000000";
239
240 assert!(text.parse::<ContentHash>().is_err());
241 }
242}