codepage_437/
encode.rs

1use self::super::Cp437Dialect;
2use std::borrow::Cow;
3
4
5/// Errors which can occur when attempting to interpret a string as a sequence of cp437 codepoints.
6///
7/// As such, the `into_cp437` family of functions and functions make use of this error, for example.
8#[derive(Debug, Copy, Clone, Hash, Eq, Ord, PartialEq, PartialOrd)]
9pub struct Cp437Error {
10    /// Returns the index in the given string up to which valid cp437 was verified.
11    ///
12    /// It is the maximum index such that `input[..index].to_cp_437()` would return `Ok(_)`.
13    ///
14    /// # Examples
15    ///
16    /// ```
17    /// # use codepage_437::{CP437_CONTROL, ToCp437};
18    /// // some unrepresentable characters, in a &str
19    /// let word = "Eżektor";
20    ///
21    /// // ToCp437::to_cp437() returns a Cp437Error
22    /// let error = word.to_cp437(&CP437_CONTROL).unwrap_err();
23    ///
24    /// // the second character is unrepresentable here
25    /// assert_eq!(error.representable_up_to, 1);
26    /// ```
27    pub representable_up_to: usize,
28}
29
30/// A possible error value when converting a `String` into a cp437 byte vector.
31///
32/// This type is the error type for the [`into_cp437()`](trait.IntoCp437.html#tymethod.into_cp437)
33/// function on [`IntoCp437`](trait.IntoCp437.html). It is designed in such a way to carefully avoid reallocations:
34/// the [`into_string()`](#method.into_string) function will give back the String that was used
35/// in the conversion attempt.
36///
37/// The [`Cp437Error`](struct.Cp437Error.html) type represents an error that may
38/// occur when converting a `&str` to a sequence of `u8`s. In this sense, it's
39/// an analogue to `IntoCp437Error`, and you can get one from a `IntoCp437Error`
40/// through the [`cp437_error()`](#method.cp437_error) function.
41///
42/// # Examples
43///
44/// ```
45/// # use codepage_437::{CP437_CONTROL, IntoCp437};
46/// // some unrepresentable chracters, in a String
47/// let word = "Eżektor".to_string();
48///
49/// let value = word.into_cp437(&CP437_CONTROL);
50///
51/// assert!(value.is_err());
52/// assert_eq!(value.unwrap_err().into_string(), "Eżektor".to_string());
53/// ```
54#[derive(Debug, Clone, Hash, Eq, Ord, PartialEq, PartialOrd)]
55pub struct IntoCp437Error {
56    string: String,
57    error: Cp437Error,
58}
59
60impl IntoCp437Error {
61    /// Returns a `&str` that was attempted to convert to cp437.
62    ///
63    /// # Examples
64    ///
65    /// ```
66    /// # use codepage_437::{CP437_CONTROL, IntoCp437};
67    /// // some unrepresentable chracters, in a String
68    /// let word = "Eżektor".to_string();
69    ///
70    /// let value = word.into_cp437(&CP437_CONTROL);
71    ///
72    /// assert_eq!(value.unwrap_err().as_str(), "Eżektor");
73    /// ```
74    pub fn as_str(&self) -> &str {
75        &self.string
76    }
77
78    /// Returns the `String` that was attempted to convert to cp437.
79    ///
80    /// This function is carefully constructed to avoid allocation. It will
81    /// consume the error, moving out the string, so that a copy of the string
82    /// does not need to be made.
83    ///
84    /// # Examples
85    ///
86    /// ```
87    /// # use codepage_437::{CP437_CONTROL, IntoCp437};
88    /// // some unrepresentable chracters, in a String
89    /// let word = "Eżektor".to_string();
90    ///
91    /// let value = word.into_cp437(&CP437_CONTROL);
92    ///
93    /// assert_eq!(value.unwrap_err().into_string(), "Eżektor".to_string());
94    /// ```
95    pub fn into_string(self) -> String {
96        self.string
97    }
98
99    /// Fetch a `Cp437Error` to get more details about the conversion failure.
100    ///
101    /// The [`Cp437Error`](struct.Cp437Error.html) type represents an error that may
102    /// occur when converting a `&str` to a sequence of `u8`s. In this sense, it's
103    /// an analogue to `IntoCp437Error`. See its documentation for more details
104    /// on using it.
105    ///
106    /// # Examples
107    ///
108    /// ```
109    /// # use codepage_437::{CP437_CONTROL, IntoCp437};
110    /// // some unrepresentable chracters, in a String
111    /// let word = "Eżektor".to_string();
112    ///
113    /// let error = word.into_cp437(&CP437_CONTROL).unwrap_err().cp437_error();
114    ///
115    /// // the first character is unrepresentable here
116    /// assert_eq!(error.representable_up_to, 1);
117    /// ```
118    pub fn cp437_error(&self) -> Cp437Error {
119        self.error
120    }
121}
122
123
124/// Move Unicode data to a container of cp437 data.
125///
126/// # Examples
127///
128/// Good:
129///
130/// ```
131/// # use codepage_437::{CP437_CONTROL, IntoCp437};
132/// let cp437 = vec![0x4C, 0x6F, 0x63, 0x61, 0x6C, 0x20, 0x6E, 0x65, 0x77, 0x73, 0x20, 0x72, 0x65,
133///                  0x70, 0x6F, 0x72, 0x74, 0x73, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20, 0x74, 0x68,
134///                  0x65, 0x20, 0x9E, 0xAB, 0x20, 0x6D, 0x69, 0x6C, 0x6C, 0x69, 0x6F, 0x6E, 0x20,
135///                  0x41, 0x69, 0x72, 0x20, 0x4D, 0x65, 0x6C, 0x61, 0x6E, 0x65, 0x73, 0x69, 0x91,
136///                  0x20, 0x61, 0x69, 0x72, 0x63, 0x72, 0x61, 0x66, 0x74, 0x20, 0x68, 0x61, 0x73,
137///                  0x20, 0x63, 0x72, 0x61, 0x73, 0x68, 0x65, 0x64, 0x20, 0x74, 0x68, 0x69, 0x73,
138///                  0x20, 0x6D, 0x6F, 0x72, 0x6E, 0x69, 0x6E, 0x67, 0x20, 0x61, 0x72, 0x6F, 0x75,
139///                  0x6E, 0x64, 0x20, 0x39, 0x3A, 0x30, 0x30, 0x61, 0x6D, 0x2E];
140/// let unicode =
141///     "Local news reports that the ₧½ million Air Melanesiæ aircraft has crashed this morning around 9:00am.".to_string();
142///
143/// assert_eq!(unicode.into_cp437(&CP437_CONTROL), Ok(cp437));  // unicode is moved out of
144/// ```
145///
146/// Unrepresentable:
147///
148/// ```
149/// # use codepage_437::{CP437_CONTROL, IntoCp437};
150/// // Ż cannot be represented in cp437
151/// let unicode = "Jurek je żurek w żupanie.".to_string();
152///
153/// let error = unicode.into_cp437(&CP437_CONTROL).unwrap_err();  // unicode is moved out of
154/// assert_eq!(error.as_str(), "Jurek je żurek w żupanie.");
155/// assert_eq!(error.cp437_error().representable_up_to, 9);
156///
157/// let unicode = error.into_string();                   // unicode now the same as original
158/// # assert_eq!(unicode, "Jurek je żurek w żupanie.");
159/// ```
160pub trait IntoCp437<T> {
161    /// Do the conversion.
162    fn into_cp437(self, dialect: &Cp437Dialect) -> Result<T, IntoCp437Error>;
163}
164
165impl IntoCp437<Vec<u8>> for String {
166    fn into_cp437(self, dialect: &Cp437Dialect) -> Result<Vec<u8>, IntoCp437Error> {
167        if self.chars().all(|c| dialect.overlap_unicode(c)) {
168            Ok(self.into_bytes())
169        } else {
170            to_cp437_impl_meat(&self, dialect).map_err(|e| {
171                IntoCp437Error {
172                    string: self,
173                    error: e,
174                }
175            })
176        }
177    }
178}
179
180
181/// Borrow (if possible) Unicode data as cp437 data.
182///
183/// # Examples
184///
185/// Good:
186///
187/// ```
188/// # use codepage_437::{CP437_CONTROL, ToCp437};
189/// let cp437 = [0x4C, 0x6F, 0x63, 0x61, 0x6C, 0x20, 0x6E, 0x65, 0x77, 0x73, 0x20, 0x72, 0x65,
190///              0x70, 0x6F, 0x72, 0x74, 0x73, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20, 0x74, 0x68,
191///              0x65, 0x20, 0x9E, 0xAB, 0x20, 0x6D, 0x69, 0x6C, 0x6C, 0x69, 0x6F, 0x6E, 0x20,
192///              0x41, 0x69, 0x72, 0x20, 0x4D, 0x65, 0x6C, 0x61, 0x6E, 0x65, 0x73, 0x69, 0x91,
193///              0x20, 0x61, 0x69, 0x72, 0x63, 0x72, 0x61, 0x66, 0x74, 0x20, 0x68, 0x61, 0x73,
194///              0x20, 0x63, 0x72, 0x61, 0x73, 0x68, 0x65, 0x64, 0x20, 0x74, 0x68, 0x69, 0x73,
195///              0x20, 0x6D, 0x6F, 0x72, 0x6E, 0x69, 0x6E, 0x67, 0x20, 0x61, 0x72, 0x6F, 0x75,
196///              0x6E, 0x64, 0x20, 0x39, 0x3A, 0x30, 0x30, 0x61, 0x6D, 0x2E];
197/// let unicode = "Local news reports that the ₧½ million Air Melanesiæ aircraft has crashed this morning around 9:00am.";
198///
199/// assert_eq!(unicode.to_cp437(&CP437_CONTROL), Ok(cp437[..].into()));
200/// ```
201///
202/// Unrepresentable:
203///
204/// ```
205/// # use codepage_437::{CP437_CONTROL, ToCp437};
206/// // Ż cannot be represented in cp437
207/// let unicode = "Jurek je żurek w żupanie.";
208///
209/// let error = unicode.to_cp437(&CP437_CONTROL).unwrap_err();
210/// assert_eq!(error.representable_up_to, 9);
211/// ```
212pub trait ToCp437<'s, T> {
213    /// Do the conversion.
214    fn to_cp437(&'s self, dialect: &Cp437Dialect) -> Result<T, Cp437Error>;
215}
216
217impl<'s> ToCp437<'s, Cow<'s, [u8]>> for str {
218    fn to_cp437(&'s self, dialect: &Cp437Dialect) -> Result<Cow<'s, [u8]>, Cp437Error> {
219        to_cp437_cow_impl(&self, dialect)
220    }
221}
222
223impl<'s, S: AsRef<str>> ToCp437<'s, Cow<'s, [u8]>> for S {
224    fn to_cp437(&'s self, dialect: &Cp437Dialect) -> Result<Cow<'s, [u8]>, Cp437Error> {
225        to_cp437_cow_impl(self.as_ref(), dialect)
226    }
227}
228
229
230fn to_cp437_cow_impl<'c>(whom: &'c str, dialect: &Cp437Dialect) -> Result<Cow<'c, [u8]>, Cp437Error> {
231    if whom.chars().all(|c| dialect.overlap_unicode(c)) {
232        Ok(Cow::Borrowed(whom.as_bytes()))
233    } else {
234        to_cp437_impl_meat(whom, dialect).map(Cow::Owned)
235    }
236}
237
238fn to_cp437_impl_meat(whom: &str, dialect: &Cp437Dialect) -> Result<Vec<u8>, Cp437Error> {
239    let mut result = Vec::with_capacity(whom.chars().count());
240
241    for c in whom.chars() {
242        if let Some(b) = dialect.encode(c) {
243            result.push(b);
244        } else {
245            return Err(Cp437Error { representable_up_to: result.len() });
246        }
247    }
248
249    Ok(result)
250}