cdtoc/
ctdb.rs

1/*!
2# CDTOC: CUETools Database
3*/
4
5use crate::{
6	hex,
7	ShaB64,
8	Toc,
9	TocError,
10	TocKind,
11};
12use dactyl::traits::HexToUnsigned;
13use std::collections::BTreeMap;
14
15
16
17impl Toc {
18	#[cfg_attr(docsrs, doc(cfg(feature = "ctdb")))]
19	#[must_use]
20	/// # CUETools Database ID.
21	///
22	/// This returns the [CUETools Database](http://cue.tools/wiki/CUETools_Database) ID
23	/// corresponding to the table of contents.
24	///
25	/// ## Examples
26	///
27	/// ```
28	/// use cdtoc::Toc;
29	///
30	/// let toc = Toc::from_cdtoc("4+96+2D2B+6256+B327+D84A").unwrap();
31	/// assert_eq!(
32	///     toc.ctdb_id().to_string(),
33	///     "VukMWWItblELRM.CEFpXxw0FlME-",
34	/// );
35	/// ```
36	pub fn ctdb_id(&self) -> ShaB64 {
37		use sha1::Digest;
38
39		// Split the leadin from the rest of the sectors.
40		let [leadin, sectors @ ..] = self.audio_sectors() else { unreachable!() };
41
42		// Start with a whole lotta ASCII zeroes.
43		let mut buf = crate::TRACK_ZEROES;
44
45		// Overwrite the first few entries with the audio and leadout sectors,
46		// relative to the leadin.
47		for (k, v) in sectors.iter().copied().chain(std::iter::once(self.audio_leadout())).enumerate() {
48			buf[k] = hex::upper_encode_u32(v - leadin);
49		}
50
51		// SHA and done!
52		let mut sha = sha1::Sha1::new();
53		sha.update(buf.as_flattened());
54		ShaB64::from(sha)
55	}
56
57	#[cfg_attr(docsrs, doc(cfg(feature = "ctdb")))]
58	#[must_use]
59	/// # CUETools Database URL.
60	///
61	/// This URL can be visited in a web browser to view the details for the
62	/// disc (if it is present in the database).
63	///
64	/// See also: [`Toc::ctdb_checksum_url`]
65	///
66	/// ## Examples
67	///
68	/// ```
69	/// use cdtoc::Toc;
70	///
71	/// let toc = Toc::from_cdtoc("4+96+2D2B+6256+B327+D84A").unwrap();
72	///
73	/// // Note: the CUETools website lacks SSL/TLS support.
74	/// assert_eq!(
75	///     toc.ctdb_url(),
76	///     "http://db.cuetools.net/?tocid=VukMWWItblELRM.CEFpXxw0FlME-",
77	/// );
78	/// ```
79	pub fn ctdb_url(&self) -> String {
80		let mut out = String::with_capacity(58);
81		out.push_str("http://db.cuetools.net/?tocid=");
82		self.ctdb_id().push_to_string(&mut out);
83		out
84	}
85
86	#[cfg_attr(docsrs, doc(cfg(feature = "ctdb")))]
87	#[must_use]
88	/// # CUETools Database Checksum URL.
89	///
90	/// This URL can be used to fetch XML-formatted checksums and metadata for
91	/// the disc (if it is present in the database).
92	///
93	/// See also: [`Toc::ctdb_url`]
94	///
95	/// ## Examples
96	///
97	/// ```
98	/// use cdtoc::Toc;
99	///
100	/// let toc = Toc::from_cdtoc("4+96+2D2B+6256+B327+D84A").unwrap();
101	///
102	/// // Note: the CUETools website lacks SSL/TLS support.
103	/// assert_eq!(
104	///     toc.ctdb_checksum_url(),
105	///     "http://db.cuetools.net/lookup2.php?version=3&ctdb=1&fuzzy=1&toc=0:11413:25024:45713:55220",
106	/// );
107	/// ```
108	pub fn ctdb_checksum_url(&self) -> String {
109		// We can't efficiently precalculate the exact size, but the next
110		// power-of-two isn't too far away, so we might as well start there.
111		let mut url = String::with_capacity(128);
112		url.push_str("http://db.cuetools.net/lookup2.php?version=3&ctdb=1&fuzzy=1&toc=");
113
114		// Digit buffer.
115		let mut buf = U32DigitBuffer::DEFAULT;
116
117		// Leading data?
118		if matches!(self.kind, TocKind::DataFirst) {
119			url.push('-');
120			for &c in buf.format(self.data - 150) { url.push(c); }
121			url.push(':');
122		}
123
124		// Each audio track relative to the first.
125		for v in &self.audio {
126			for &c in buf.format(v - 150) { url.push(c); }
127			url.push(':');
128		}
129
130		// Trailing data?
131		if matches!(self.kind, TocKind::CDExtra) {
132			url.push('-');
133			for &c in buf.format(self.data - 150) { url.push(c); }
134			url.push(':');
135		}
136
137		// And the leadout.
138		for &c in buf.format(self.leadout - 150) { url.push(c); }
139
140		url
141	}
142
143	#[cfg_attr(docsrs, doc(cfg(feature = "ctdb")))]
144	/// # Parse Checksums.
145	///
146	/// This will parse the track checksums from an XML CTDB [lookup](Toc::ctdb_checksum_url).
147	///
148	/// The return result is a vector — indexed by track number (`n-1`) — of
149	/// `checksum => confidence` pairs.
150	///
151	/// ## Errors
152	///
153	/// This method uses naive parsing so does not worry about strict XML
154	/// validation, but will return an error if other parsing errors are
155	/// encountered or no checksums are found.
156	pub fn ctdb_parse_checksums(&self, xml: &str) -> Result<Vec<BTreeMap<u32, u16>>, TocError> {
157		let audio_len = self.audio_len();
158		let mut out: Vec<BTreeMap<u32, u16>> = vec![BTreeMap::default(); audio_len];
159
160		for line in xml.lines() {
161			if let Some((confidence, crcs)) = parse_entry(line.trim()) {
162				let confidence: u16 = confidence.parse().map_err(|_| TocError::Checksums)?;
163				let mut id = 0;
164				for chk in crcs.split_ascii_whitespace() {
165					let crc = u32::htou(chk.as_bytes()).ok_or(TocError::Checksums)?;
166					if crc != 0 {
167						let e = out[id].entry(crc).or_insert(0);
168						*e = e.saturating_add(confidence);
169					}
170					id += 1;
171				}
172
173				if id != audio_len { return Err(TocError::Checksums); }
174			}
175		}
176
177		// Consider it okay if we found at least one checksum.
178		if out.iter().any(|v| ! v.is_empty()) { Ok(out) }
179		else { Err(TocError::NoChecksums) }
180	}
181}
182
183
184
185/// # Parse XML Entry.
186///
187/// This returns the value subslices corresponding to the "confidence" and
188/// "trackcrcs" attributes.
189fn parse_entry(line: &str) -> Option<(&str, &str)> {
190	if line.starts_with("<entry ") {
191		let confidence = parse_attr(line, " confidence=\"")?;
192		let crcs = parse_attr(line, " trackcrcs=\"")?;
193		Some((confidence, crcs))
194	}
195	else { None }
196}
197
198/// # Parse Entry Value.
199///
200/// This naively parses an attribute value from a tag, returning the subslice
201/// corresponding to its value if non-empty.
202///
203/// But that's okay; there shouldn't be!
204fn parse_attr<'a>(mut line: &'a str, attr: &'static str) -> Option<&'a str> {
205	let start = line.find(attr)?;
206	line = &line[start + attr.len()..];
207	let end = line.find('"')?;
208
209	if 0 < end { Some(line[..end].trim()) }
210	else { None }
211}
212
213
214
215#[derive(Debug, Clone, Copy)]
216/// # Digit Buffer.
217///
218/// This buffer is used by `ctdb_checksum_url` to convert `u32` values to
219/// string. It doesn't do anything fancy, but helps reduce writes/allocations.
220struct U32DigitBuffer([char; 10]);
221
222impl U32DigitBuffer {
223	/// # Default Buffer.
224	const DEFAULT: Self = Self(['0'; 10]);
225
226	#[expect(clippy::cast_possible_truncation, reason = "False positive.")]
227	/// # Digitize a Number.
228	///
229	/// Return a slice containing each digit represented as an ASCII `char`.
230	const fn format(&mut self, mut num: u32) -> &[char] {
231		// Fill the buffer, right to left.
232		let mut len = 0;
233		while 9 < num {
234			len += 1;
235			self.0[10 - len] = ((num % 10) as u8 ^ b'0') as char;
236			num /= 10;
237		}
238		len += 1;
239		self.0[10 - len] = (num as u8 ^ b'0') as char;
240
241		// Split off and return the relevant part.
242		let (_, b) = self.0.split_at(10 - len);
243		b
244	}
245}
246
247
248
249#[cfg(test)]
250mod tests {
251	use super::*;
252
253	#[test]
254	fn t_ctdb() {
255		for (t, id, lookup) in [
256			(
257				"18+B6+3CE3+7C6F+B2BD+E47F+1121C+15865+175E0+1AED9+1E159+20BF9+235FC+259EF+2826E+29B62+2ED67+311B1+3396B+36ACB+3916B+3BB75+3D60A+40AA6+422FE+48B68+4E4CB",
258				"sBOUSHYC0oLdQZtAEQcmnc3V3Ak-",
259				"http://db.cuetools.net/lookup2.php?version=3&ctdb=1&fuzzy=1&toc=32:15437:31705:45607:58345:70022:88015:95562:110147:123075:133987:144742:153945:164312:170700:191697:200987:211157:223797:233685:244447:251252:264720:270952:-297682:320565",
260			),
261			(
262				"D+96+3B5D+78E3+B441+EC83+134F4+17225+1A801+1EA5C+23B5B+27CEF+2B58B+2F974+35D56+514C8",
263				"gmEsiU5wvQFA1Nq9YE_posiwgK8-",
264				"http://db.cuetools.net/lookup2.php?version=3&ctdb=1&fuzzy=1&toc=0:15047:30797:45995:60397:78942:94607:108395:125382:146117:162905:177397:194782:-220352:332850",
265			),
266			(
267				"4+96+2D2B+6256+B327+D84A",
268				"VukMWWItblELRM.CEFpXxw0FlME-",
269				"http://db.cuetools.net/lookup2.php?version=3&ctdb=1&fuzzy=1&toc=0:11413:25024:45713:55220",
270			),
271			(
272				"10+B6+5352+62AC+99D6+E218+12AC0+135E7+142E9+178B0+19D22+1B0D0+1E7FA+22882+247DB+27074+2A1BD+2C0FB",
273				"iL4EZ56YD5WmG..M4v5qzPG0cFY-",
274				"http://db.cuetools.net/lookup2.php?version=3&ctdb=1&fuzzy=1&toc=32:21180:25110:39232:57730:76330:79185:82515:96282:105612:110650:124772:141292:149317:159710:172327:180325",
275			),
276			(
277				"15+247E+2BEC+4AF4+7368+9704+B794+E271+110D0+12B7A+145C1+16CAF+195CF+1B40F+1F04A+21380+2362D+2589D+2793D+2A760+2DA32+300E1+32B46",
278				"8geCxI4CSyw_ydvHWGmPQUGF1UE-",
279				"http://db.cuetools.net/lookup2.php?version=3&ctdb=1&fuzzy=1&toc=9192:11094:19038:29394:38510:46846:57819:69690:76516:83243:93209:103737:111481:126900:135914:144791:153607:161959:173770:186780:196683:207536",
280			),
281			(
282				"63+96+12D9+5546+A8A2+CAAA+128BF+17194+171DF+1722A+17275+172C0+1730B+17356+173A1+173EC+17437+17482+174CD+17518+17563+175AE+175F9+17644+1768F+176DA+17725+17770+177BB+17806+17851+1789C+178E7+17932+1797D+179C8+17A13+17A5E+17AA9+17AF4+17B3F+17B8A+17BD5+17C20+17C6B+17CB6+17D01+17D4C+17D97+17DE2+17E2D+17E78+17EC3+17F0E+17F59+17FA4+17FEF+1803A+18085+180D0+1811B+18166+181B1+181FC+18247+18292+182DD+18328+18373+183BE+18409+18454+1849F+184EA+18535+18580+185CB+18616+18661+186AC+186F7+18742+1878D+187D8+18823+1886E+188B9+18904+1894F+1899A+189E5+18A30+18A7B+18AC6+18B11+18B5C+18BA7+18BF2+18C38+1ECDC+246E9",
283				"okpTZ4Yt2noZkGqbBLte3FfkyVs-",
284				"http://db.cuetools.net/lookup2.php?version=3&ctdb=1&fuzzy=1&toc=0:4675:21680:43020:51732:75817:94462:94537:94612:94687:94762:94837:94912:94987:95062:95137:95212:95287:95362:95437:95512:95587:95662:95737:95812:95887:95962:96037:96112:96187:96262:96337:96412:96487:96562:96637:96712:96787:96862:96937:97012:97087:97162:97237:97312:97387:97462:97537:97612:97687:97762:97837:97912:97987:98062:98137:98212:98287:98362:98437:98512:98587:98662:98737:98812:98887:98962:99037:99112:99187:99262:99337:99412:99487:99562:99637:99712:99787:99862:99937:100012:100087:100162:100237:100312:100387:100462:100537:100612:100687:100762:100837:100912:100987:101062:101137:101212:101282:126022:149075",
285			),
286		] {
287			let toc = Toc::from_cdtoc(t).expect("Invalid TOC");
288			let ctdb_id = toc.ctdb_id();
289			assert_eq!(ctdb_id.to_string(), id);
290			assert_eq!(toc.ctdb_checksum_url(), lookup);
291
292			// Test decoding three ways.
293			assert_eq!(ShaB64::decode(id), Ok(ctdb_id));
294			assert_eq!(ShaB64::try_from(id), Ok(ctdb_id));
295			assert_eq!(id.parse::<ShaB64>(), Ok(ctdb_id));
296		}
297	}
298
299	#[test]
300	fn t_digits() {
301		let mut buf = U32DigitBuffer::DEFAULT;
302		assert_eq!(buf.format(0), &['0']);
303		assert_eq!(buf.format(10), &['1', '0']);
304		assert_eq!(buf.format(432), &['4', '3', '2']);
305		assert_eq!(buf.format(50_000), &['5', '0', '0', '0', '0']);
306		assert_eq!(buf.format(u32::MAX), &['4', '2', '9', '4', '9', '6', '7', '2', '9', '5']);
307	}
308}