1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
use std::borrow::Cow;

use thiserror::Error;

pub mod code_pages;
mod internal;

#[derive(Error, Debug)]
#[error("Character in UTF-8 string has no mapping defined in code page")]
pub struct EncodeError {}

pub trait CodePage: crate::internal::Encoder {
    /// Encode UTF-8 string into single-byte encoding
    ///
    /// Undefined characters will result in [`EncodeError`]
    ///
    /// # Examples
    ///
    /// ```
    /// use yore::{CodePage, EncodeError};
    ///
    /// // Erase type for example - prefer concrete type over trait object whenever possible
    /// let cp850: &dyn CodePage = &yore::code_pages::CP850;
    /// assert_eq!(cp850.encode("text").unwrap(), vec![116, 101, 120, 116]);
    /// assert!(matches!(cp850.encode("text 🦀"), EncodeError));
    /// ```
    #[inline]
    fn encode<'a>(&self, s: &'a str) -> Result<Cow<'a, [u8]>, EncodeError> {
        self.encode_helper(s, None)
    }

    /// Encode UTF-8 string into single-byte encoding
    ///
    /// Undefined characters will be replaced with byte `fallback`
    ///
    /// # Examples
    ///
    /// ```
    /// use yore::CodePage;
    ///
    /// // Erase type for example - prefer concrete type over trait object whenever possible
    /// let cp850: &dyn CodePage = &yore::code_pages::CP850;
    /// assert_eq!(cp850.encode_lossy("text 🦀", 168), vec![116, 101, 120, 116, 32, 168])
    /// ```
    #[inline]
    fn encode_lossy<'a>(&self, s: &'a str, fallback: u8) -> Cow<'a, [u8]> {
        self.encode_helper(s, Some(fallback)).unwrap()
    }

    /// Decode single-byte encoding into UTF-8 string
    ///
    /// Undefined codepoints will result in [`DecodeError`]
    ///
    /// # Examples
    ///
    /// ```
    /// use yore::{CodePage, DecodeError};
    ///
    /// // Erase types for example - prefer concrete type over trait object whenever possible
    /// let cp850: &dyn CodePage = &yore::code_pages::CP850;
    /// let cp857: &dyn CodePage = &yore::code_pages::CP857;
    /// assert_eq!(cp850.decode(&[116, 101, 120, 116]).unwrap(), "text");
    ///
    /// //codepoint 231 is undefined
    /// assert!(matches!(cp857.decode(&[116, 101, 120, 116, 231]), Err(DecodeError{position: 4, value: 231})));
    /// ```
    fn decode<'a>(&self, bytes: &'a [u8]) -> Result<Cow<'a, str>, DecodeError>;

    /// Decode single-byte encoding into UTF-8 string
    ///
    /// Undefined codepoints will be replaced with `'�'`
    ///
    /// # Examples
    ///
    /// ```
    /// use yore::CodePage;
    ///
    /// // Erase type for example - prefer concrete type over trait object whenever possible
    /// let cp857: &dyn CodePage = &yore::code_pages::CP857;
    /// //codepoint 231 is undefined
    /// assert_eq!(cp857.decode_lossy(&[116, 101, 120, 116, 32, 231]), "text �");
    /// ```
    fn decode_lossy<'a>(&self, bytes: &'a [u8]) -> Cow<'a, str>;

    /// Decode single-byte encoding into UTF-8 string
    ///
    /// Undefined codepoints will be replaced with `fallback`
    ///
    /// # Examples
    ///
    /// ```
    /// use yore::CodePage;
    ///
    /// // Erase type for example - prefer concrete type over trait object whenever possible
    /// let cp857: &dyn CodePage = &yore::code_pages::CP857;
    /// //codepoint 231 is undefined
    /// assert_eq!(cp857.decode_lossy_fallback(&[116, 101, 120, 116, 32, 231], '�'), "text �");
    /// ```
    fn decode_lossy_fallback<'a>(&self, bytes: &'a [u8], fallback: char) -> Cow<'a, str>;
}

#[derive(Error, Debug)]
#[error("Undefined codepoint {value} at offset {position}")]
pub struct DecodeError {
    pub position: usize,
    pub value: u8,
}