Skip to main content

mutf8/
lib.rs

1//!
2//! This is more or less just an initial implementation.
3//! Currently, only from raw mutf8 and to utf8 operations are supported.
4//! Ideally, this will turn into a "complete enough" mutf8 library for use in other libs/apps.
5//!
6
7mod mutf8;
8
9#[cfg(feature = "use-structs")]
10mod str;
11
12pub use mutf8::mutf8_to_utf8;
13pub use mutf8::utf8_to_mutf8;
14
15#[cfg(feature = "use-structs")]
16pub use crate::str::MString;
17
18#[cfg(feature = "use-structs")]
19pub use crate::str::mstr;
20
21
22pub mod error {
23	use std::fmt::{Display, Formatter, Result as FResult};
24	use std::string::FromUtf8Error;
25	use std::str::Utf8Error;
26
27	// pub type Result<T, E = Error> = std::result::Result<T, E>;
28	pub type Result<T, E = Error> = std::result::Result<T, E>;
29
30	#[derive(Debug)]
31	pub enum Error {
32		EndOfInput(Mode, Expected, Position),
33		InvalidUtf8 {
34			bytes: Option<Vec<u8>>,
35			error: Utf8Error,
36		},
37	}
38
39	impl Display for Error {
40		fn fmt(&self, f: &mut Formatter<'_>) -> FResult {
41			match self {
42				Self::EndOfInput(Mode::Encoding, Expected::TwoByte, Position::Two) => f.write_str("Unexpected end of input. [Unable to encode a two byte encoding. (Second byte)]"),
43				Self::EndOfInput(Mode::Encoding, Expected::ThreeByte, Position::Two) => f.write_str("Unexpected end of input. [Unable to encode a three byte encoding. (Second byte)]"),
44				Self::EndOfInput(Mode::Encoding, Expected::ThreeByte, Position::Three) => f.write_str("Unexpected end of input. [Unable to encode a three byte encoding. (Third byte)]"),
45				Self::EndOfInput(Mode::Encoding, Expected::FourByte, Position::Two) => f.write_str("Unexpected end of input. [Unable to encode a four byte encoding. (Second byte)]"),
46				Self::EndOfInput(Mode::Encoding, Expected::FourByte, Position::Three) => f.write_str("Unexpected end of input. [Unable to encode a four byte encoding. (Third byte)]"),
47				Self::EndOfInput(Mode::Encoding, Expected::FourByte, Position::Four) => f.write_str("Unexpected end of input. [Unable to encode a four byte encoding. (Fourth byte)]"),
48
49				Self::EndOfInput(Mode::Decoding, Expected::TwoByte, Position::Two) => f.write_str("Unexpected end of input. [Unable to decode a two byte encoding. (Second byte)]"),
50				Self::EndOfInput(Mode::Decoding, Expected::ThreeByte, Position::Two) => f.write_str("Unexpected end of input. [Unable to decode a three byte encoding. (Second byte)]"),
51				Self::EndOfInput(Mode::Decoding, Expected::ThreeByte, Position::Three) => f.write_str("Unexpected end of input. [Unable to decode a three byte encoding. (Third byte)]"),
52				Self::EndOfInput(Mode::Decoding, Expected::SixByte, Position::Four) => f.write_str("Unexpected end of input. [Unable to decode a six byte encoding. (Fourth byte)]"),
53				Self::EndOfInput(Mode::Decoding, Expected::SixByte, Position::Five) => f.write_str("Unexpected end of input. [Unable to decode a six byte encoding. (Fifth byte)]"),
54				Self::EndOfInput(Mode::Decoding, Expected::SixByte, Position::Six) => f.write_str("Unexpected end of input. [Unable to decode a six byte encoding. (Sixth byte)]"),
55
56				Self::InvalidUtf8 {
57					bytes: _,
58					error
59				} => {
60					f.write_str("Invalid UTF-8 input. [Failed to decode string into UTF-8 (")?;
61					Display::fmt(error, f)?;
62					f.write_str(")]")
63				},
64
65				_ => unreachable!(),
66			}
67		}
68	}
69
70	impl std::error::Error for Error {
71	}
72
73	impl From<Utf8Error> for Error {
74		fn from(err: Utf8Error) -> Self {
75			Error::InvalidUtf8 {
76				bytes: None,
77				error: err
78			}
79		}
80	}
81
82	impl From<FromUtf8Error> for Error {
83		fn from(err: FromUtf8Error) -> Self {
84			let error = err.utf8_error();
85			let bytes = err.into_bytes();
86			Error::InvalidUtf8 {
87				bytes: Some(bytes),
88				error,
89			}
90		}
91	}
92
93	/// Used to describe the transcoding state.
94	/// We define encoding as going to MUTF-8.
95	#[derive(Debug)]
96	pub enum Mode {
97		/// UTF-8 being encoded, and converted into MUTF-8.
98		Encoding,
99		/// MUTF-8 being decoded, and converted into UTF-8.
100		Decoding,
101	}
102
103	/// What specifically the conversion functions were trying to encode/decode before they ran into an issue.
104	#[derive(Debug)]
105	pub enum Expected {
106		/// The UTF-8 and MUTF-8 specification both define two byte encodings.
107		/// To determine which it refers to, examine the main error enum, that should contain the `Mode`. (This describes if the error occurred during encoding or decoding)
108		TwoByte,
109		/// The UTF-8 and MUTF-8 specification both define three byte encodings.
110		/// To determine which it refers to, examine the main error enum, that should contain the `Mode`. (This describes if the error occurred during encoding or decoding)
111		///
112		/// Due to how the six byte is encoded, it can be incorrectly reported as a three byte error.
113		ThreeByte,
114		/// Only the UTF-8 specification defines a four byte encoding.
115		/// The four byte equivalent in MUTF-8 is defined as a six byte encoding.
116		FourByte,
117		/// Only the MUTF-8 specification defines a six-byte encoding.
118		/// The four byte equivalent in UTF-8 is defined as a four byte encoding.
119		/// In other words, this is MUTF-8's representation of UTF-8's four byte encoding.
120		///
121		/// Due to how the six byte is encoded, it can be incorrectly reported as a three byte error.
122		/// The information on whether a three byte encoding is a six byte or not is encoded in the second and third byte, so if they somehow get cut, we lose that information.
123		SixByte,
124	}
125
126	/// The position of the current byte during the encoding/decoding phase.
127	/// Only some of the positions are valid for some combinations of the bytes.
128	///
129	/// For example, it doesn't make any sense for there to be an error like: (Expected::TwoByte, Position::Five).
130	/// There is no fifth byte to be read as it's only trying to read two bytes, so this should be treated as an internal error.
131	#[derive(Debug)]
132	pub enum Position {
133		Two,
134		Three,
135		Four,
136		Five,
137		Six,
138	}
139}