1use alloc::borrow::Cow;
37use alloc::string::String;
38use alloc::vec::Vec;
39
40#[must_use]
44pub fn decode_dvb_string(bytes: &[u8]) -> String {
45 if bytes.is_empty() {
46 return String::new();
47 }
48
49 let (charset, body) = split_charset(bytes);
50 let decoded = match charset {
51 Charset::Iso6937 => decode_iso_6937(body),
52 Charset::Iso8859(n) => decode_iso_8859(n, body),
53 Charset::Utf8 => String::from_utf8_lossy(body).into_owned(),
54 Charset::Ucs2Be => decode_ucs2_be(body),
55 #[cfg(feature = "std")]
56 Charset::Ksx1001 => decode_with(encoding_rs::EUC_KR, body),
57 #[cfg(feature = "std")]
58 Charset::Gb2312 => decode_with(encoding_rs::GBK, body),
59 #[cfg(feature = "std")]
60 Charset::Big5 => decode_with(encoding_rs::BIG5, body),
61 #[cfg(not(feature = "std"))]
65 Charset::Ksx1001 | Charset::Gb2312 | Charset::Big5 => {
66 body.iter().map(|_| '\u{FFFD}').collect()
67 }
68 Charset::Unsupported(_indicator) => body.iter().map(|_| '\u{FFFD}').collect(),
69 };
70
71 decoded
78 .chars()
79 .filter_map(|c| match c as u32 {
80 0x86 | 0x87 | 0xE086 | 0xE087 => None,
81 0x8A | 0xE08A => Some(' '),
82 0x0A => Some(' '),
83 code if code < 0x20 => None,
84 code if (0x80..0xA0).contains(&code) => None,
85 code if (0xE080..0xE0A0).contains(&code) => None,
86 _ => Some(c),
87 })
88 .collect()
89}
90
91#[must_use]
94pub fn decode(bytes: &[u8]) -> Cow<'_, str> {
95 if bytes.iter().all(|&b| b.is_ascii() && b >= 0x20) {
96 return Cow::Borrowed(core::str::from_utf8(bytes).unwrap_or(""));
97 }
98 Cow::Owned(decode_dvb_string(bytes))
99}
100
101#[derive(Clone, Copy, PartialEq, Eq, Hash)]
105#[cfg_attr(feature = "yoke", derive(yoke::Yokeable))]
106pub struct DvbText<'a>(&'a [u8]);
107
108impl<'a> DvbText<'a> {
109 #[must_use]
111 pub const fn new(raw: &'a [u8]) -> Self {
112 Self(raw)
113 }
114 #[must_use]
116 pub const fn raw(&self) -> &'a [u8] {
117 self.0
118 }
119 #[must_use]
123 pub fn decode(&self) -> Cow<'a, str> {
124 decode(self.0)
125 }
126}
127
128impl core::ops::Deref for DvbText<'_> {
129 type Target = [u8];
132 fn deref(&self) -> &[u8] {
133 self.0
134 }
135}
136
137impl core::fmt::Display for DvbText<'_> {
138 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
139 f.write_str(&self.decode())
140 }
141}
142
143impl core::fmt::Debug for DvbText<'_> {
144 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
145 write!(f, "DvbText({:?})", self.decode())
146 }
147}
148
149impl<'a> From<&'a [u8]> for DvbText<'a> {
150 fn from(raw: &'a [u8]) -> Self {
151 Self(raw)
152 }
153}
154
155#[cfg(feature = "serde")]
156impl serde::Serialize for DvbText<'_> {
157 fn serialize<S: serde::Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
158 s.serialize_str(&self.decode())
159 }
160}
161#[derive(Clone, Copy, PartialEq, Eq, Hash)]
166pub struct LangCode(pub [u8; 3]);
167
168impl LangCode {
169 #[must_use]
171 pub fn as_str(&self) -> Cow<'_, str> {
172 String::from_utf8_lossy(&self.0)
173 }
174}
175
176impl core::ops::Deref for LangCode {
177 type Target = [u8; 3];
178 fn deref(&self) -> &[u8; 3] {
179 &self.0
180 }
181}
182
183impl core::fmt::Display for LangCode {
184 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
185 f.write_str(&self.as_str())
186 }
187}
188
189impl core::fmt::Debug for LangCode {
190 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
191 write!(f, "LangCode({})", self.as_str())
192 }
193}
194
195#[cfg(feature = "serde")]
196impl serde::Serialize for LangCode {
197 fn serialize<S: serde::Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
198 s.serialize_str(&self.as_str())
199 }
200}
201
202#[derive(Debug)]
203enum Charset {
204 Iso6937,
205 Iso8859(u8),
206 Utf8,
207 Ucs2Be,
208 Ksx1001,
210 Gb2312,
212 Big5,
214 Unsupported(u8),
215}
216
217fn split_charset(bytes: &[u8]) -> (Charset, &[u8]) {
218 match bytes[0] {
219 b if b >= 0x20 => (Charset::Iso6937, bytes),
220 0x00 => (Charset::Iso6937, &bytes[1..]),
221 0x08 => (Charset::Unsupported(0x08), &bytes[1..]),
224 0x01..=0x0B => (Charset::Iso8859(bytes[0] + 4), &bytes[1..]),
225 0x10 if bytes.len() >= 3 && bytes[1] == 0x00 => (Charset::Iso8859(bytes[2]), &bytes[3..]),
226 0x11 => (Charset::Ucs2Be, &bytes[1..]),
227 0x12 => (Charset::Ksx1001, &bytes[1..]),
228 0x13 => (Charset::Gb2312, &bytes[1..]),
229 0x14 => (Charset::Big5, &bytes[1..]),
230 0x15 => (Charset::Utf8, &bytes[1..]),
231 0x1F if bytes.len() >= 2 => (Charset::Unsupported(0x1F), &bytes[2..]),
234 other => (Charset::Unsupported(other), &bytes[1..]),
235 }
236}
237
238fn decode_iso_6937(bytes: &[u8]) -> String {
239 let mut out = String::with_capacity(bytes.len());
240 let mut i = 0;
241 while i < bytes.len() {
242 let b = bytes[i];
243 if (0xC0..=0xCF).contains(&b) {
245 match combining_mark(b) {
246 Some(mark) if i + 1 < bytes.len() => {
247 let base = bytes[i + 1];
248 if let Some(c) = combine(b, base) {
249 out.push(c);
250 } else {
251 out.push(iso_6937_single(base));
254 out.push(mark);
255 }
256 i += 2;
257 }
258 _ => {
260 out.push('\u{FFFD}');
261 i += 1;
262 }
263 }
264 continue;
265 }
266 out.push(iso_6937_single(b));
267 i += 1;
268 }
269 out
270}
271
272fn iso_6937_single(b: u8) -> char {
280 match b {
281 0x00..=0x7F => b as char,
282 0x86 | 0x87 | 0x8A => b as char,
284 0x80..=0x9F => '\u{FFFD}',
285 0xA0 => '\u{00A0}', 0xA1 => '¡',
287 0xA2 => '¢',
288 0xA3 => '£',
289 0xA4 => '\u{20AC}', 0xA5 => '¥',
291 0xA6 => '\u{FFFD}', 0xA7 => '§',
293 0xA8 => '\u{00A4}', 0xA9 => '\u{2018}', 0xAA => '\u{201C}', 0xAB => '«',
297 0xAC => '\u{2190}', 0xAD => '\u{2191}', 0xAE => '\u{2192}', 0xAF => '\u{2193}', 0xB0 => '°',
302 0xB1 => '±',
303 0xB2 => '²',
304 0xB3 => '³',
305 0xB4 => '\u{00D7}', 0xB5 => 'µ',
307 0xB6 => '¶',
308 0xB7 => '·',
309 0xB8 => '\u{00F7}', 0xB9 => '\u{2019}', 0xBA => '\u{201D}', 0xBB => '»',
313 0xBC => '¼',
314 0xBD => '½',
315 0xBE => '¾',
316 0xBF => '¿',
317 0xC0..=0xCF => '\u{FFFD}',
319 0xD0 => '\u{2015}', 0xD1 => '¹',
321 0xD2 => '®',
322 0xD3 => '©',
323 0xD4 => '\u{2122}', 0xD5 => '\u{266A}', 0xD6 => '¬',
326 0xD7 => '\u{00A6}', 0xD8..=0xDB => '\u{FFFD}', 0xDC => '\u{215B}', 0xDD => '\u{215C}', 0xDE => '\u{215D}', 0xDF => '\u{215E}', 0xE0 => '\u{2126}', 0xE1 => 'Æ',
334 0xE2 => '\u{0110}', 0xE3 => 'ª',
336 0xE4 => '\u{0126}', 0xE5 => '\u{FFFD}', 0xE6 => '\u{0132}', 0xE7 => '\u{013F}', 0xE8 => '\u{0141}', 0xE9 => 'Ø',
342 0xEA => '\u{0152}', 0xEB => 'º',
344 0xEC => 'Þ',
345 0xED => '\u{0166}', 0xEE => '\u{014A}', 0xEF => '\u{0149}', 0xF0 => '\u{0138}', 0xF1 => 'æ',
350 0xF2 => '\u{0111}', 0xF3 => 'ð',
352 0xF4 => '\u{0127}', 0xF5 => '\u{0131}', 0xF6 => '\u{0133}', 0xF7 => '\u{0140}', 0xF8 => '\u{0142}', 0xF9 => 'ø',
358 0xFA => '\u{0153}', 0xFB => 'ß',
360 0xFC => '\u{00FE}', 0xFD => '\u{0167}', 0xFE => '\u{014B}', 0xFF => '\u{00AD}', }
365}
366
367fn combining_mark(prefix: u8) -> Option<char> {
370 Some(match prefix {
371 0xC1 => '\u{0300}', 0xC2 => '\u{0301}', 0xC3 => '\u{0302}', 0xC4 => '\u{0303}', 0xC5 => '\u{0304}', 0xC6 => '\u{0306}', 0xC7 => '\u{0307}', 0xC8 => '\u{0308}', 0xCA => '\u{030A}', 0xCB => '\u{0327}', 0xCD => '\u{030B}', 0xCE => '\u{0328}', 0xCF => '\u{030C}', _ => return None,
385 })
386}
387
388fn combine(prefix: u8, base: u8) -> Option<char> {
389 Some(match (prefix, base) {
390 (0xC1, b'A') => 'À',
391 (0xC1, b'E') => 'È',
392 (0xC1, b'I') => 'Ì',
393 (0xC1, b'O') => 'Ò',
394 (0xC1, b'U') => 'Ù',
395 (0xC1, b'a') => 'à',
396 (0xC1, b'e') => 'è',
397 (0xC1, b'i') => 'ì',
398 (0xC1, b'o') => 'ò',
399 (0xC1, b'u') => 'ù',
400 (0xC2, b'A') => 'Á',
401 (0xC2, b'E') => 'É',
402 (0xC2, b'I') => 'Í',
403 (0xC2, b'O') => 'Ó',
404 (0xC2, b'U') => 'Ú',
405 (0xC2, b'Y') => 'Ý',
406 (0xC2, b'a') => 'á',
407 (0xC2, b'e') => 'é',
408 (0xC2, b'i') => 'í',
409 (0xC2, b'o') => 'ó',
410 (0xC2, b'u') => 'ú',
411 (0xC2, b'y') => 'ý',
412 (0xC2, b'C') => 'Ć',
413 (0xC2, b'c') => 'ć',
414 (0xC2, b'L') => 'Ĺ',
415 (0xC2, b'l') => 'ĺ',
416 (0xC2, b'N') => 'Ń',
417 (0xC2, b'n') => 'ń',
418 (0xC2, b'R') => 'Ŕ',
419 (0xC2, b'r') => 'ŕ',
420 (0xC2, b'S') => 'Ś',
421 (0xC2, b's') => 'ś',
422 (0xC2, b'Z') => 'Ź',
423 (0xC2, b'z') => 'ź',
424 (0xC3, b'A') => 'Â',
425 (0xC3, b'E') => 'Ê',
426 (0xC3, b'I') => 'Î',
427 (0xC3, b'O') => 'Ô',
428 (0xC3, b'U') => 'Û',
429 (0xC3, b'a') => 'â',
430 (0xC3, b'e') => 'ê',
431 (0xC3, b'i') => 'î',
432 (0xC3, b'o') => 'ô',
433 (0xC3, b'u') => 'û',
434 (0xC4, b'A') => 'Ã',
435 (0xC4, b'N') => 'Ñ',
436 (0xC4, b'O') => 'Õ',
437 (0xC4, b'a') => 'ã',
438 (0xC4, b'n') => 'ñ',
439 (0xC4, b'o') => 'õ',
440 (0xC4, b'I') => 'Ĩ',
441 (0xC4, b'i') => 'ĩ',
442 (0xC4, b'U') => 'Ũ',
443 (0xC4, b'u') => 'ũ',
444 (0xC5, b'A') => 'Ā',
446 (0xC5, b'a') => 'ā',
447 (0xC5, b'E') => 'Ē',
448 (0xC5, b'e') => 'ē',
449 (0xC5, b'I') => 'Ī',
450 (0xC5, b'i') => 'ī',
451 (0xC5, b'O') => 'Ō',
452 (0xC5, b'o') => 'ō',
453 (0xC5, b'U') => 'Ū',
454 (0xC5, b'u') => 'ū',
455 (0xC6, b'A') => 'Ă',
457 (0xC6, b'a') => 'ă',
458 (0xC6, b'G') => 'Ğ',
459 (0xC6, b'g') => 'ğ',
460 (0xC6, b'U') => 'Ŭ',
461 (0xC6, b'u') => 'ŭ',
462 (0xC7, b'C') => 'Ċ',
464 (0xC7, b'c') => 'ċ',
465 (0xC7, b'E') => 'Ė',
466 (0xC7, b'e') => 'ė',
467 (0xC7, b'G') => 'Ġ',
468 (0xC7, b'g') => 'ġ',
469 (0xC7, b'I') => 'İ',
470 (0xC7, b'Z') => 'Ż',
471 (0xC7, b'z') => 'ż',
472 (0xC8, b'A') => 'Ä',
473 (0xC8, b'E') => 'Ë',
474 (0xC8, b'I') => 'Ï',
475 (0xC8, b'O') => 'Ö',
476 (0xC8, b'U') => 'Ü',
477 (0xC8, b'Y') => 'Ÿ',
478 (0xC8, b'a') => 'ä',
479 (0xC8, b'e') => 'ë',
480 (0xC8, b'i') => 'ï',
481 (0xC8, b'o') => 'ö',
482 (0xC8, b'u') => 'ü',
483 (0xC8, b'y') => 'ÿ',
484 (0xCA, b'A') => 'Å',
486 (0xCA, b'a') => 'å',
487 (0xCA, b'U') => 'Ů',
488 (0xCA, b'u') => 'ů',
489 (0xCB, b'C') => 'Ç',
490 (0xCB, b'c') => 'ç',
491 (0xCB, b'G') => 'Ģ',
492 (0xCB, b'g') => 'ģ',
493 (0xCB, b'K') => 'Ķ',
494 (0xCB, b'k') => 'ķ',
495 (0xCB, b'L') => 'Ļ',
496 (0xCB, b'l') => 'ļ',
497 (0xCB, b'N') => 'Ņ',
498 (0xCB, b'n') => 'ņ',
499 (0xCB, b'R') => 'Ŗ',
500 (0xCB, b'r') => 'ŗ',
501 (0xCB, b'S') => 'Ş',
502 (0xCB, b's') => 'ş',
503 (0xCB, b'T') => 'Ţ',
504 (0xCB, b't') => 'ţ',
505 (0xCD, b'O') => 'Ő',
507 (0xCD, b'o') => 'ő',
508 (0xCD, b'U') => 'Ű',
509 (0xCD, b'u') => 'ű',
510 (0xCE, b'A') => 'Ą',
512 (0xCE, b'a') => 'ą',
513 (0xCE, b'E') => 'Ę',
514 (0xCE, b'e') => 'ę',
515 (0xCE, b'I') => 'Į',
516 (0xCE, b'i') => 'į',
517 (0xCE, b'U') => 'Ų',
518 (0xCE, b'u') => 'ų',
519 (0xCF, b'C') => 'Č',
521 (0xCF, b'c') => 'č',
522 (0xCF, b'D') => 'Ď',
523 (0xCF, b'd') => 'ď',
524 (0xCF, b'E') => 'Ě',
525 (0xCF, b'e') => 'ě',
526 (0xCF, b'L') => 'Ľ',
527 (0xCF, b'l') => 'ľ',
528 (0xCF, b'N') => 'Ň',
529 (0xCF, b'n') => 'ň',
530 (0xCF, b'R') => 'Ř',
531 (0xCF, b'r') => 'ř',
532 (0xCF, b'S') => 'Š',
533 (0xCF, b's') => 'š',
534 (0xCF, b'T') => 'Ť',
535 (0xCF, b't') => 'ť',
536 (0xCF, b'Z') => 'Ž',
537 (0xCF, b'z') => 'ž',
538 _ => return None,
539 })
540}
541
542fn decode_iso_8859(n: u8, bytes: &[u8]) -> String {
543 if n == 1 {
548 return bytes.iter().map(|&b| b as char).collect();
549 }
550 #[cfg(feature = "std")]
554 {
555 use encoding_rs::*;
556 let encoding: &'static Encoding = match n {
557 2 => ISO_8859_2,
558 3 => ISO_8859_3,
559 4 => ISO_8859_4,
560 5 => ISO_8859_5,
561 6 => ISO_8859_6,
562 7 => ISO_8859_7,
563 8 => ISO_8859_8,
564 9 => WINDOWS_1254,
565 10 => ISO_8859_10,
566 11 => WINDOWS_874,
567 13 => ISO_8859_13,
568 14 => ISO_8859_14,
569 15 => ISO_8859_15,
570 _ => return bytes.iter().map(|_| '\u{FFFD}').collect(),
571 };
572 let (cow, _, _) = encoding.decode(bytes);
573 cow.into_owned()
574 }
575 #[cfg(not(feature = "std"))]
576 {
577 let _ = n;
578 bytes.iter().map(|_| '\u{FFFD}').collect()
579 }
580}
581
582#[cfg(feature = "std")]
583fn decode_with(encoding: &'static encoding_rs::Encoding, bytes: &[u8]) -> String {
584 let (cow, _, _) = encoding.decode(bytes);
585 cow.into_owned()
586}
587
588fn decode_ucs2_be(bytes: &[u8]) -> String {
589 let code_units: Vec<u16> = bytes
590 .chunks_exact(2)
591 .map(|pair| u16::from_be_bytes([pair[0], pair[1]]))
592 .collect();
593 String::from_utf16_lossy(&code_units)
594}
595
596#[cfg(test)]
597mod tests {
598 use super::*;
599
600 #[test]
601 fn decode_empty_input_returns_empty_string() {
602 assert_eq!(decode_dvb_string(&[]), "");
603 }
604
605 #[test]
606 fn decode_plain_ascii_is_borrowed() {
607 let cow = decode(b"HELLO");
608 assert!(matches!(cow, Cow::Borrowed(_)));
609 assert_eq!(cow, "HELLO");
610 }
611
612 #[test]
613 fn decode_iso6937_latin_accent_chars() {
614 assert_eq!(decode_dvb_string(&[0x00, 0xC2, b'A']), "Á");
615 assert_eq!(decode_dvb_string(&[0x00, 0xC1, b'e']), "è");
616 assert_eq!(decode_dvb_string(&[0x00, 0xC8, b'o']), "ö");
617 }
618
619 #[test]
620 fn decode_selector_0x01_yields_iso8859_5_cyrillic() {
621 let s = decode_dvb_string(&[0x01, 0xB0, 0xB1]);
622 assert!(s.chars().all(|c| c != '\u{FFFD}'), "got: {s:?}");
623 assert!(!s.is_empty());
624 }
625
626 #[test]
627 fn decode_selector_0x10_extended_yields_iso8859_nn() {
628 let s = decode_dvb_string(&[0x10, 0x00, 0x09, b'A', b'B']);
629 assert_eq!(s, "AB");
630 }
631
632 #[test]
633 fn decode_selector_0x11_ucs2_be() {
634 let s = decode_dvb_string(&[0x11, 0x00, 0x41, 0x00, 0x42]);
635 assert_eq!(s, "AB");
636 }
637
638 #[test]
639 fn decode_selector_0x15_utf8_passthrough() {
640 let s = decode_dvb_string(&[0x15, 0xC3, 0xA9, 0xC3, 0xA9]);
641 assert_eq!(s, "éé");
642 }
643
644 #[test]
645 fn decode_control_chars_stripped_linefeed_becomes_space() {
646 let s = decode_dvb_string(b"A\x01B\nC");
647 assert_eq!(s, "AB C");
648 }
649
650 #[test]
651 fn emphasis_on_off_markers_stripped_per_annex_a2() {
652 let s = decode_dvb_string(&[0x00, b'A', 0x86, b'B', 0x87, b'C']);
655 assert_eq!(s, "ABC");
656 }
657
658 #[test]
659 fn decode_annex_a2_crlf_0x8a_becomes_space() {
660 let s = decode_dvb_string(&[0x00, b'A', 0x8A, b'B']);
662 assert_eq!(s, "A B");
663 }
664
665 #[test]
666 fn decode_selector_0x12_ksx1001_euc_kr() {
667 assert_eq!(decode_dvb_string(&[0x12, 0xB0, 0xA1]), "가");
669 }
670
671 #[test]
672 fn decode_selector_0x13_gb2312() {
673 assert_eq!(decode_dvb_string(&[0x13, 0xC4, 0xE3]), "你");
675 }
676
677 #[test]
678 fn decode_selector_0x14_big5() {
679 assert_eq!(decode_dvb_string(&[0x14, 0xA4, 0xA4]), "中");
681 }
682
683 #[test]
687 fn decode_selector_0x13_gbk_trail_byte_in_c1_range() {
688 assert_eq!(decode_dvb_string(&[0x13, 0x81, 0x80]), "亐");
689 }
690
691 #[test]
695 fn two_byte_control_codes_filtered() {
696 assert_eq!(decode_dvb_string(&[0x13, 0xAB, 0xCD]), " ");
697 assert_eq!(decode_dvb_string(&[0x13, 0xAB, 0xC3]), "");
698 }
699
700 #[test]
703 fn decode_selector_0x1f_encoding_type_id() {
704 let s = decode_dvb_string(&[0x1F, 0x01, 0x41, 0x42]);
705 assert_eq!(s.chars().count(), 2);
706 assert!(s.chars().all(|c| c == '\u{FFFD}'));
707 }
708
709 #[test]
711 fn reserved_selector_0x08_is_unsupported() {
712 let s = decode_dvb_string(&[0x08, 0x41, 0x42]);
713 assert!(s.chars().all(|c| c == '\u{FFFD}'));
714 assert_eq!(s.chars().count(), 2);
715 }
716
717 #[test]
718 fn unknown_selector_returns_replacement_characters() {
719 let s = decode_dvb_string(&[0x16, 0xAA, 0xBB, 0xCC]);
721 assert_eq!(s.chars().count(), 3);
722 assert!(s.chars().all(|c| c == '\u{FFFD}'));
723 }
724
725 #[test]
728 fn selector_0x10_iso_8859_1_decodes_latin1() {
729 let s = decode_dvb_string(&[0x10, 0x00, 0x01, 0x41, 0xE9]);
733 assert_eq!(s, "Aé");
734 }
735
736 #[test]
737 fn unsupported_iso_8859_12_yields_replacement() {
738 let s = decode_dvb_string(&[0x10, 0x00, 0x0C, 0x41, 0x42]);
741 assert!(s.chars().all(|c| c == '\u{FFFD}'), "got: {s:?}");
742 }
743
744 #[test]
749 fn figure_a1_gr_area_single_byte_mappings() {
750 let pins: &[(u8, char)] = &[
751 (0xA0, '\u{00A0}'), (0xA1, '¡'),
753 (0xA2, '¢'),
754 (0xA3, '£'),
755 (0xA4, '\u{20AC}'), (0xA5, '¥'),
757 (0xA7, '§'),
758 (0xA8, '\u{00A4}'), (0xA9, '\u{2018}'), (0xAA, '\u{201C}'), (0xAB, '«'),
762 (0xAC, '\u{2190}'), (0xAD, '\u{2191}'), (0xAE, '\u{2192}'), (0xAF, '\u{2193}'), (0xB0, '°'),
767 (0xB1, '±'),
768 (0xB2, '²'),
769 (0xB3, '³'),
770 (0xB4, '\u{00D7}'), (0xB5, 'µ'),
772 (0xB6, '¶'),
773 (0xB7, '·'),
774 (0xB8, '\u{00F7}'), (0xB9, '\u{2019}'), (0xBA, '\u{201D}'), (0xBB, '»'),
778 (0xBC, '¼'),
779 (0xBD, '½'),
780 (0xBE, '¾'),
781 (0xBF, '¿'),
782 (0xD0, '\u{2015}'), (0xD1, '¹'),
784 (0xD2, '®'),
785 (0xD3, '©'),
786 (0xD4, '\u{2122}'), (0xD5, '\u{266A}'), (0xD6, '¬'),
789 (0xD7, '\u{00A6}'), (0xDC, '\u{215B}'), (0xDD, '\u{215C}'), (0xDE, '\u{215D}'), (0xDF, '\u{215E}'), (0xE0, '\u{2126}'), (0xE1, 'Æ'),
796 (0xE2, '\u{0110}'), (0xE3, 'ª'),
798 (0xE4, '\u{0126}'), (0xE6, '\u{0132}'), (0xE7, '\u{013F}'), (0xE8, '\u{0141}'), (0xE9, 'Ø'),
803 (0xEA, '\u{0152}'), (0xEB, 'º'),
805 (0xEC, 'Þ'),
806 (0xED, '\u{0166}'), (0xEE, '\u{014A}'), (0xEF, '\u{0149}'), (0xF0, '\u{0138}'), (0xF1, 'æ'),
811 (0xF2, '\u{0111}'), (0xF3, 'ð'),
813 (0xF4, '\u{0127}'), (0xF5, '\u{0131}'), (0xF6, '\u{0133}'), (0xF7, '\u{0140}'), (0xF8, '\u{0142}'), (0xF9, 'ø'),
819 (0xFA, '\u{0153}'), (0xFB, 'ß'),
821 (0xFC, '\u{00FE}'), (0xFD, '\u{0167}'), (0xFE, '\u{014B}'), (0xFF, '\u{00AD}'), ];
826 for &(byte, want) in pins {
827 let got = decode_dvb_string(&[0x00, byte]);
828 assert_eq!(
829 got,
830 want.to_string(),
831 "byte {byte:#04x}: want {want:?} (U+{:04X}), got {got:?}",
832 want as u32
833 );
834 }
835 }
836
837 #[test]
839 fn figure_a1_undefined_positions_are_replacement() {
840 for byte in [0xA6u8, 0xD8, 0xD9, 0xDA, 0xDB, 0xE5] {
841 let got = decode_dvb_string(&[0x00, byte]);
842 assert_eq!(got, "\u{FFFD}", "byte {byte:#04x} should be U+FFFD");
843 }
844 }
845
846 #[test]
848 fn figure_a1_combining_precomposed() {
849 assert_eq!(decode_dvb_string(&[0x00, 0xCA, b'a']), "å"); assert_eq!(decode_dvb_string(&[0x00, 0xCA, b'A']), "Å");
851 assert_eq!(decode_dvb_string(&[0x00, 0xCF, b's']), "š"); assert_eq!(decode_dvb_string(&[0x00, 0xCF, b'Z']), "Ž");
853 assert_eq!(decode_dvb_string(&[0x00, 0xCE, b'e']), "ę"); assert_eq!(decode_dvb_string(&[0x00, 0xCD, b'o']), "ő"); assert_eq!(decode_dvb_string(&[0x00, 0xC7, b'z']), "ż"); assert_eq!(decode_dvb_string(&[0x00, 0xC5, b'a']), "ā"); assert_eq!(decode_dvb_string(&[0x00, 0xC6, b'g']), "ğ"); }
859
860 #[test]
863 fn figure_a1_combining_fallback_emits_base_plus_mark() {
864 assert_eq!(decode_dvb_string(&[0x00, 0xC5, b'x']), "x\u{0304}");
865 }
866
867 #[test]
870 fn figure_a1_combining_undefined_or_dangling_prefix() {
871 assert_eq!(decode_dvb_string(&[0x00, 0xC0, b'a']), "\u{FFFD}a");
872 assert_eq!(decode_dvb_string(&[0x00, 0xC9, b'a']), "\u{FFFD}a");
873 assert_eq!(decode_dvb_string(&[0x00, 0xCC, b'a']), "\u{FFFD}a");
874 assert_eq!(decode_dvb_string(&[0x00, 0xC2]), "\u{FFFD}");
875 }
876
877 #[test]
878 fn dvb_text_decodes_with_charset_selector() {
879 let t = DvbText::new(&[0x15, 0xC3, 0xA9]); assert_eq!(t.decode(), "é");
881 assert_eq!(t.raw(), &[0x15, 0xC3, 0xA9]);
882 assert_eq!(&t[..], &[0x15, 0xC3, 0xA9]); assert_eq!(format!("{t}"), "é");
884 }
885
886 #[test]
887 fn lang_code_as_str() {
888 assert_eq!(LangCode(*b"fre").as_str(), "fre");
889 assert_eq!(LangCode([0xFF, b'r', b'e']).as_str(), "\u{FFFD}re"); }
891
892 #[cfg(feature = "serde")]
893 #[test]
894 fn dvb_text_serializes_decoded() {
895 let t = DvbText::new(&[0x15, 0xC3, 0xA9]);
896 assert_eq!(serde_json::to_string(&t).unwrap(), "\"é\"");
897 }
898
899 #[cfg(feature = "serde")]
900 #[test]
901 fn lang_code_serializes_as_string() {
902 let lc = LangCode(*b"FRA");
905 assert_eq!(serde_json::to_string(&lc).unwrap(), "\"FRA\"");
906 }
907}