encode_unicode 0.2.0

Alternative and extension to the unstable `char.encode_utf8()` and char.encode_utf16()` methods. Also has UTF-8- and UTF-16-encoded character types.
Documentation
/* Copyright 2016 Torbjørn Birch Moltu
 *
 * Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
 * http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
 * http://opensource.org/licenses/MIT>, at your option. This file may not be
 * copied, modified, or distributed except according to those terms.
 */


//! Boilerplatey error enums

extern crate core;
use self::core::fmt::{self,Display,Formatter};
#[cfg(not(feature="no_std"))]
use std::error::Error;


macro_rules! simple {(#[$tydoc:meta] $err:ident  {
                          $($(#[$vardoc:meta])* ::$variant:ident => $string:expr),+,
                      } ) => {
    #[$tydoc]
    #[derive(Clone,Copy, Debug, PartialEq,Eq)]
    pub enum $err {
        $($(#[$vardoc])* $variant),*
    }
    #[cfg(feature="no_std")]
    impl $err {
        #[allow(missing_docs)]
        pub fn description(&self) -> &'static str {
            match *self {$($err::$variant=>$string),*}
        }
    }
    #[cfg(not(feature="no_std"))]
    impl Error for $err {
        fn description(&self) -> &'static str {
            match *self {$($err::$variant=>$string),*}
        }
    }
    impl Display for $err {
        fn fmt(&self,  fmtr: &mut Formatter) -> fmt::Result {
            write!(fmtr, "{}", self.description())
        }
    }
}}


simple!{/// Reasons why an `u32` is not a valid UTF codepoint.
    InvalidCodepoint {
        /// It's reserved for UTF-16 surrogate pairs."
        ::Utf16Reserved => "is reserved for UTF-16 surrogate pairs",
        /// It's higher than the highest codepoint (which is 0x10ffff).
        ::TooHigh => "is higher than the highest codepoint",
    }}
use self::InvalidCodepoint::*;
impl InvalidCodepoint {
    /// Get the range of values for which this error would be given.
    pub fn error_range(self) -> (u32,u32) {match self {
        Utf16Reserved => (0xd8_00, 0xdf_ff),
        TooHigh => (0x00_10_ff_ff, 0xff_ff_ff_ff),
    }}
}


simple!{/// Reasons why one or two `u16`s are not valid UTF-16, in sinking precedence.
    InvalidUtf16Tuple {
        /// The first unit is a trailing/low surrogate, which is never valid.
        ///
        /// Note that the value of a low surrogate is actually higher than a high surrogate.
        ::FirstIsTrailingSurrogate => "the first unit is a trailing / low surrogate, which is never valid",
        /// You provided a second unit, but the first one stands on its own.
        ::SuperfluousSecond => "the second unit is superfluous",
        /// The first and only unit requires a second unit.
        ::MissingSecond => "the first unit requires a second unit",
        /// The first unit requires a second unit, but it's not a trailing/low surrogate.
        ///
        /// Note that the value of a low surrogate is actually higher than a high surrogate.
        ::InvalidSecond => "the required second unit is not a trailing / low surrogate",
    }}

simple!{/// Reasons why a slice of `u16`s doesn't start with valid UTF-16.
    InvalidUtf16Slice {
        /// The slice is empty.
        ::EmptySlice => "the slice is empty",
        /// The first unit is a low surrogate.
        ::FirstLowSurrogate => "the first unit is a low surrogate",
        /// The first and only unit requires a second unit.
        ::MissingSecond => "the first and only unit requires a second one",
        /// The first unit requires a second one, but it's not a low surrogate.
        ::SecondNotLowSurrogate => "the required second unit is not a low surrogate",
    }}


simple!{/// Reasons why `Utf8Char::from_str()` failed.
    FromStrError {
        /// `Utf8Char` cannot store more than a single codepoint.
        ::MultipleCodepoints => "has more than one codepoint",
        /// `Utf8Char` cannot be empty.
        ::Empty => "is empty",
    }}

simple!{/// Reasons why a byte is not the start of a UTF-8 codepoint.
    InvalidUtf8FirstByte {
        /// Sequences cannot be longer than 4 bytes. Is given for values >= 240.
        ::TooLongSeqence => "is greater than 239 (UTF-8 seqences cannot be longer than four bytes)",
        /// This byte belongs to a previous seqence. Is given for values between 128 and 192 (exclusive).
        ::ContinuationByte => "is a continuation of a previous sequence",
    }}
use self::InvalidUtf8FirstByte::*;



macro_rules! complex {
($err:ty
 {$($sub:ty => $to:expr,)*}
 {$($desc:pat => $string:expr),+,}
 => $use_cause:expr =>
 {$($cause:pat => $result:expr),+,} $(#[$causedoc:meta])*
) => {
    $(impl From<$sub> for $err {
          fn from(error: $sub) -> $err {
              $to(error)
          }
      })*
    #[cfg(feature="no_std")]
    impl $err {
        #[allow(missing_docs)]
        pub fn description(&self) -> &'static str {
            match *self{ $($desc => $string,)* }
        }
        /// A hack to avoid two Display impls
        fn cause(&self) -> Option<&Display> {None}
    }
    #[cfg(not(feature="no_std"))]
    impl Error for $err {
        fn description(&self) -> &'static str {
            match *self{ $($desc => $string,)* }
        }
        $(#[$causedoc])*
        fn cause(&self) -> Option<&Error> {
            match *self{ $($cause => $result,)* }
        }
    }
    impl Display for $err {
        fn fmt(&self,  fmtr: &mut Formatter) -> fmt::Result {
            match (self.cause(), $use_cause) {
                (Some(d),true) => write!(fmtr, "{}: {}", self.description(), d),
                        _      => write!(fmtr, "{}", self.description()),
            }
        }
    }
}}


/// Reasons why a byte sequence is not valid UTF-8, excluding invalid codepoint.
/// In sinking precedence.
#[derive(Clone,Copy, Debug, PartialEq,Eq)]
pub enum InvalidUtf8 {
    /// Something is wrong with the first byte.
    FirstByte(InvalidUtf8FirstByte),
    /// Thee byte at index 1...3 should be a continuation byte,
    /// but dosesn't fit the pattern 0b10xx_xxxx.
    NotAContinuationByte(usize),
    /// There are too many leading zeros: it could be a byte shorter.
    ///
    /// [Decoding this could allow someone to input otherwise prohibited
    /// characters and sequences, such as "../"](https://tools.ietf.org/html/rfc3629#section-10).
    OverLong,
}
use self::InvalidUtf8::*;
complex!{InvalidUtf8 {
        InvalidUtf8FirstByte => FirstByte,
    } {
        FirstByte(TooLongSeqence) => "the first byte is greater than 239 (UTF-8 seqences cannot be longer than four bytes)",
        FirstByte(ContinuationByte) => "the first byte is a continuation of a previous sequence",
        OverLong => "the seqence contains too many zeros and could be shorter",
        NotAContinuationByte(_) => "the sequence is too short",
    } => false => {
        FirstByte(ref cause) => Some(cause),
        _ => None,
    }/// Returns `Some` if the error is a `InvalidUtf8FirstByte`.
}


/// Reasons why a byte array is not valid UTF-8, in sinking precedence.
#[derive(Clone,Copy, Debug, PartialEq,Eq)]
pub enum InvalidUtf8Array {
    /// Not a valid UTF-8 sequence.
    Utf8(InvalidUtf8),
    /// Not a valid unicode codepoint.
    Codepoint(InvalidCodepoint),
}
complex!{InvalidUtf8Array {
        InvalidUtf8 => InvalidUtf8Array::Utf8,
        InvalidCodepoint => InvalidUtf8Array::Codepoint,
    } {
        InvalidUtf8Array::Utf8(_) => "the seqence is invalid UTF-8",
        InvalidUtf8Array::Codepoint(_) => "the encoded codepoint is invalid",
    } => true => {
        InvalidUtf8Array::Utf8(ref u) => Some(u),
        InvalidUtf8Array::Codepoint(ref c) => Some(c),
    }/// Always returns `Some`.
}


/// Reasons why a byte slice is not valid UTF-8, in sinking precedence.
#[derive(Clone,Copy, Debug, PartialEq,Eq)]
pub enum InvalidUtf8Slice {
    /// Something is certainly wrong with the first byte.
    Utf8(InvalidUtf8),
    /// The encoded codepoint is invalid:
    Codepoint(InvalidCodepoint),
    /// The slice is too short; n bytes was required.
    TooShort(usize),
}
complex!{InvalidUtf8Slice {
        InvalidUtf8 => InvalidUtf8Slice::Utf8,
        InvalidCodepoint => InvalidUtf8Slice::Codepoint,
    } {
        InvalidUtf8Slice::Utf8(_) => "the seqence is invalid UTF-8",
        InvalidUtf8Slice::Codepoint(_) => "the encoded codepoint is invalid",
        InvalidUtf8Slice::TooShort(0) => "the slice is empty",
        InvalidUtf8Slice::TooShort(_) => "the slice is shorter than the seqence",
    } => true => {
        InvalidUtf8Slice::Utf8(ref u) => Some(u),
        InvalidUtf8Slice::Codepoint(ref c) => Some(c),
        InvalidUtf8Slice::TooShort(_) => None,
    }
}