Skip to main content

teletex/
lib.rs

1#![doc = include_str!("../README.md")]
2#![no_std]
3use core::iter::{Iterator, FusedIterator, ExactSizeIterator};
4
5/// Return `true` if the character `c` is a diacritic in Teletex
6#[inline]
7pub const fn is_teletex_diacritic (c: u8) -> bool {
8    (c > 0xC0) && (c <= 0xCF)
9}
10
11/// Convert a single Teletex character to its equivalent Unicode character
12pub const fn teletex_char_to_utf8_char (c: u8) -> char {
13    match c {
14        0xA4 => '$',
15        0xA6 => '#',
16        0xA8 => '¤', // U+00A4
17        0xB4 => '×', // U+00D7
18        0xB8 => '÷', // U+00F7
19        0xE0 => 'Ω', // U+2126
20        0xE1 => 'Æ', // U+00C6
21        0xE2 => 'Ð', // U+00D0
22        0xE3 => 'ª', // U+00AA
23        0xE4 => 'Ħ', // U+0126
24        0xE6 => 'IJ', // U+0132
25        0xE7 => 'Ŀ', // U+013F
26        0xE8 => 'Ł', // U+0141
27        0xE9 => 'Ø', // U+00D8
28        0xEA => 'Œ', // U+0152
29        0xEB => 'º', // U+00BA
30        0xEC => 'Þ', // U+00DE
31        0xED => 'Ŧ', // U+0166
32        0xEE => 'Ŋ', // U+014A
33        0xEF => 'ʼn', // U+0149
34        0xF0 => 'ĸ', // U+0138
35        0xF1 => 'æ', // U+00E6
36        0xF2 => 'đ', // U+0111
37        0xF3 => 'ð', // U+00F0
38        0xF4 => 'ħ', // U+0127
39        0xF5 => 'ı', // U+0131
40        0xF6 => 'ij', // U+0133
41        0xF7 => 'ŀ', // U+0140
42        0xF8 => 'ł', // U+0142
43        0xF9 => 'ø', // U+00F8
44        0xFA => 'œ', // U+0153
45        0xFB => 'ß', // U+00DF
46        0xFC => 'þ', // U+00FE
47        0xFD => 'ŧ', // U+0167
48        0xFE => 'ŋ', // U+014B
49
50        // Diacritics
51        0xC1 => '\u{0300}',
52        0xC2 => '\u{0301}',
53        0xC3 => '\u{0302}',
54        0xC4 => '\u{0303}',
55        0xC5 => '\u{0304}',
56        0xC6 => '\u{0306}',
57        0xC7 => '\u{0307}',
58        0xC8 => '\u{0308}',
59        0xC9 => '\u{0308}',
60        0xCA => '\u{030A}',
61        0xCB => '\u{0327}',
62        0xCC => '\u{0332}',
63        0xCD => '\u{030B}',
64        0xCE => '\u{0328}',
65        0xCF => '\u{030C}',
66
67        anything_else => if anything_else.is_ascii() {
68            anything_else as char
69        } else {
70            '\u{FFFD}' // Replacement character �
71        },
72    }
73}
74
75/// Iterator over the conversion of Teletex to UTF-8
76pub struct TeletexToUnicodeChars<'a> {
77    teletex: &'a [u8],
78    diacritic: Option<char>,
79}
80
81impl <'a> TeletexToUnicodeChars<'a> {
82
83    #[inline]
84    pub(crate) const fn new(teletex: &'a [u8]) -> TeletexToUnicodeChars<'a> {
85        TeletexToUnicodeChars{ teletex, diacritic: None }
86    }
87
88}
89
90const REPLACEMENT_CHAR: char = '\u{FFFD}';
91
92impl <'a> Iterator for TeletexToUnicodeChars<'a> {
93    type Item = char;
94
95    fn next(&mut self) -> Option<Self::Item> {
96        if let Some(diac) = self.diacritic.take() {
97            return Some(diac);
98        }
99        let tb = *self.teletex.first()?;
100        self.teletex = &self.teletex[1..];
101        if !is_teletex_diacritic(tb) {
102            return Some(teletex_char_to_utf8_char(tb));
103        }
104        let tb2 = *self.teletex.first()?;
105        self.teletex = &self.teletex[1..];
106        if !tb2.is_ascii_alphabetic() {
107            // If the diacritic seems misplaced, just return replacement character.
108            return Some(REPLACEMENT_CHAR);
109        }
110        // In UTF-8 the diacritic comes after the letter.
111        // In Teletex, it comes before. We swap here.
112        self.diacritic = Some(teletex_char_to_utf8_char(tb));
113        Some(teletex_char_to_utf8_char(tb2))
114    }
115
116    fn size_hint(&self) -> (usize, Option<usize>) {
117        let len = self.teletex.len() + if self.diacritic.is_some() { 1 } else { 0 };
118        (len, Some(len))
119    }
120
121}
122
123impl <'a> FusedIterator for TeletexToUnicodeChars<'a> {}
124impl <'a> ExactSizeIterator for TeletexToUnicodeChars<'a> {}
125
126/// Convert Teletex to UTF-8, character by character.
127///
128/// ## Example Usage
129///
130/// ```rust
131/// use teletex::teletex_to_utf8;
132/// let input = b"Big\xA4Money\xA4";
133/// for utf8char in teletex_to_utf8(input) {
134///     // ...utf8char is the UTF-8 equivalent
135/// }
136/// ```
137#[inline]
138pub const fn teletex_to_utf8 <'a> (bytes: &'a [u8]) -> TeletexToUnicodeChars<'a> {
139    TeletexToUnicodeChars::new(bytes)
140}
141
142#[cfg(test)]
143mod tests {
144    extern crate alloc;
145    use super::teletex_to_utf8;
146    use alloc::string::String;
147
148    #[test]
149    fn it_translates_unequivalent_chars() {
150        let input = b"Big\xA4Money\xA4";
151        let output: String = teletex_to_utf8(input).collect();
152        assert_eq!(output.as_str(), "Big$Money$");
153    }
154
155    #[test]
156    fn it_transposes_and_translates_diacritics() {
157        let input = b"BigB\xC4o\xC5i";
158        let output: String = teletex_to_utf8(input).collect();
159        assert_eq!(output.as_str(), "BigBo\u{0303}i\u{0304}");
160    }
161
162    #[test]
163    fn it_decodes_an_empty_string() {
164        let input = b"";
165        let output: String = teletex_to_utf8(input).collect();
166        assert_eq!(output.as_str(), "");
167    }
168
169}