cdtoc/
shab64.rs

1/*!
2# CDTOC: Sha1/Base64
3*/
4
5use crate::TocError;
6use sha1::{
7	Digest,
8	Sha1,
9};
10use std::{
11	fmt,
12	str::FromStr,
13};
14
15
16
17#[cfg_attr(docsrs, doc(cfg(feature = "sha1")))]
18#[derive(Debug, Clone, Copy, Eq, Hash, PartialEq)]
19/// # Sha1/Base64.
20///
21/// This struct holds ID data for MusicBrainz and CTDB consisting of a binary
22/// sha1 hash encoded with an almost-but-not-quite standard base64 alphabet.
23///
24/// String formatting is deferred until [`fmt::Display`], allowing for a
25/// slightly smaller and `Copy`-friendly footprint.
26///
27/// If you already have a stringified copy and want to get back to a `ShaB64`,
28/// you can use [`ShaB64::decode`] or its `FromStr` or `TryFrom<&str>` impls.
29pub struct ShaB64([u8; 20]);
30
31impl fmt::Display for ShaB64 {
32	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
33		// The output will always be 28-bytes, ending with a dash.
34		let mut buf = [b'-'; 28];
35
36		// For all but the last chunk, it's a simple 3:4 ratio.
37		for (raw, dst) in self.0.chunks_exact(3).zip(buf.chunks_exact_mut(4)) {
38			dst[0] = base64_encode(raw[0] >> 2);
39			dst[1] = base64_encode(((raw[0] & 0b0000_0011) << 4) | (raw[1] >> 4));
40			dst[2] = base64_encode(((raw[1] & 0b0000_1111) << 2) | (raw[2] >> 6));
41			dst[3] = base64_encode(raw[2] & 0b0011_1111);
42		}
43
44		// The last byte (27) is always padding, but the three before it still
45		// need figuring.
46		buf[24] = base64_encode(self.0[18] >> 2);
47		buf[25] = base64_encode(((self.0[18] & 0b0000_0011) << 4) | (self.0[19] >> 4));
48		buf[26] = base64_encode((self.0[19] & 0b0000_1111) << 2);
49
50		std::str::from_utf8(buf.as_slice())
51			.map_err(|_| fmt::Error)
52			.and_then(|s| <str as fmt::Display>::fmt(s, f))
53	}
54}
55
56impl From<Sha1> for ShaB64 {
57	#[inline]
58	fn from(src: Sha1) -> Self { Self(<[u8; 20]>::from(src.finalize())) }
59}
60
61impl FromStr for ShaB64 {
62	type Err = TocError;
63	#[inline]
64	fn from_str(src: &str) -> Result<Self, Self::Err> { Self::decode(src) }
65}
66
67impl TryFrom<&str> for ShaB64 {
68	type Error = TocError;
69	#[inline]
70	fn try_from(src: &str) -> Result<Self, Self::Error> { Self::decode(src) }
71}
72
73impl ShaB64 {
74	/// # Decode.
75	///
76	/// Convert a string ID back into a [`ShaB64`] instance.
77	///
78	/// ## Errors
79	///
80	/// This will return an error if decoding fails.
81	pub fn decode<S>(src: S) -> Result<Self, TocError>
82	where S: AsRef<str> {
83		let src = src.as_ref().as_bytes();
84		if src.len() == 28 && src[27] == b'-' {
85			let mut out = [0_u8; 20];
86
87			// Handle all the nice four-byte chunks en masse.
88			for (i, chunk) in out.chunks_exact_mut(3).zip(src.chunks_exact(4)) {
89				let a = base64_decode(chunk[0])?;
90				let b = base64_decode(chunk[1])?;
91				let c = base64_decode(chunk[2])?;
92				let d = base64_decode(chunk[3])?;
93				i.copy_from_slice(&[
94					((a & 0b0011_1111) << 2) | (b >> 4),
95					((b & 0b0000_1111) << 4) | (c >> 2),
96					((c & 0b0000_0011) << 6) | d & 0b0011_1111,
97				]);
98			}
99
100			// Handle the remainder manually.
101			let a = base64_decode(src[24])?;
102			let b = base64_decode(src[25])?;
103			let c = base64_decode(src[26])?;
104			out[18] = ((a & 0b0011_1111) << 2) | (b >> 4);
105			out[19] = ((b & 0b0000_1111) << 4) | (c >> 2);
106
107			// Done!
108			Ok(Self(out))
109		}
110		else { Err(TocError::ShaB64Decode) }
111	}
112
113	#[inline]
114	/// # Push to String.
115	///
116	/// Unpack and write `self` onto the end of a string without the use of
117	/// any intermediary buffers.
118	pub(crate) fn push_to_string(&self, out: &mut String) {
119		// For all but the last chunk, it's a simple 3:4 ratio.
120		for chunk in self.0.chunks_exact(3) {
121			out.push(base64_encode(chunk[0] >> 2) as char);
122			out.push(base64_encode(((chunk[0] & 0b0000_0011) << 4) | (chunk[1] >> 4)) as char);
123			out.push(base64_encode(((chunk[1] & 0b0000_1111) << 2) | (chunk[2] >> 6)) as char);
124			out.push(base64_encode(chunk[2] & 0b0011_1111) as char);
125		}
126
127		// The last byte (27) is always padding, but the two before it still
128		// need figuring.
129		out.push(base64_encode(self.0[18] >> 2) as char);
130		out.push(base64_encode(((self.0[18] & 0b0000_0011) << 4) | (self.0[19] >> 4)) as char);
131		out.push(base64_encode((self.0[19] & 0b0000_1111) << 2) as char);
132
133		// And add one byte for padding.
134		out.push('-');
135	}
136}
137
138
139
140/// # Base64 Encode.
141///
142/// The alphabet used here is mostly standard, except the last two slots have
143/// `.` and `_` instead of `+` and `/`.
144const fn base64_encode(byte: u8) -> u8 {
145	debug_assert!(byte < 64, "BUG: base64 encoding byte is not 6-bit!");
146	match byte {
147		0..=25 => byte + 65,
148		26..=51 => byte + 71,
149		52..=61 => byte - 4,
150		62 => b'.',
151		63 => b'_',
152		_ => unreachable!(), // We control the inputs.
153	}
154}
155
156/// # Base64 Decode.
157const fn base64_decode(byte: u8) -> Result<u8, TocError> {
158	match byte {
159		b'A'..=b'Z' => Ok(byte - 65),
160		b'a'..=b'z' => Ok(byte - 71),
161		b'0'..=b'9' => Ok(byte + 4),
162		b'.' => Ok(62),
163		b'_' => Ok(63),
164		_ => Err(TocError::ShaB64Decode),
165	}
166}