stringprep 0.1.2

An implementation of the stringprep algorithm
Documentation
//! An implementation of the "stringprep" algorithm defined in [RFC 3454][].
//!
//! [RFC 3454]: https://tools.ietf.org/html/rfc3454
#![doc(html_root_url="https://docs.rs/stringprep/0.1.2")]
#![warn(missing_docs)]
extern crate unicode_bidi;
extern crate unicode_normalization;

use std::ascii::AsciiExt;
use std::borrow::Cow;
use std::error;
use std::fmt;
use unicode_normalization::UnicodeNormalization;

mod rfc3454;
pub mod tables;

/// Describes why a string failed stringprep normalization.
#[derive(Debug)]
enum ErrorCause {
    /// Contains stringprep prohibited characters.
    ProhibitedCharacter(char),
    /// Violates stringprep rules for bidirectional text.
    ProhibitedBidirectionalText,
}

/// An error performing the stringprep algorithm.
#[derive(Debug)]
pub struct Error(ErrorCause);

impl fmt::Display for Error {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
        match self.0 {
            ErrorCause::ProhibitedCharacter(c) => write!(fmt, "prohibited character `{}`", c),
            ErrorCause::ProhibitedBidirectionalText => write!(fmt, "prohibited bidirectional text"),
        }
    }
}

impl error::Error for Error {
    fn description(&self) -> &str {
        "error performing stringprep algorithm"
    }
}

/// Prepares a string with the SASLprep profile of the stringprep algorithm.
///
/// SASLprep is defined in [RFC 4013][].
///
/// [RFC 4013]: https://tools.ietf.org/html/rfc4013
pub fn saslprep<'a>(s: &'a str) -> Result<Cow<'a, str>, Error> {
    // fast path for ascii text
    if s.chars()
           .all(|c| c.is_ascii() && !tables::ascii_control_character(c)) {
        return Ok(Cow::Borrowed(s));
    }

    // 2.1 Mapping
    let mapped = s.chars()
        .map(|c| if tables::non_ascii_space_character(c) {
                 ' '
             } else {
                 c
             })
        .filter(|&c| !tables::commonly_mapped_to_nothing(c));

    // 2.2 Normalization
    let normalized = mapped.nfkc().collect::<String>();

    // 2.3 Prohibited Output
    let prohibited = normalized
        .chars()
        .find(|&c| {
            tables::non_ascii_space_character(c) /* C.1.2 */ ||
            tables::ascii_control_character(c) /* C.2.1 */ ||
            tables::non_ascii_control_character(c) /* C.2.2 */ ||
            tables::private_use(c) /* C.3 */ ||
            tables::non_character_code_point(c) /* C.4 */ ||
            tables::surrogate_code(c) /* C.5 */ ||
            tables::inappropriate_for_plain_text(c) /* C.6 */ ||
            tables::inappropriate_for_canonical_representation(c) /* C.7 */ ||
            tables::change_display_properties_or_deprecated(c) /* C.8 */ ||
            tables::tagging_character(c) /* C.9 */
        });
    if let Some(c) = prohibited {
        return Err(Error(ErrorCause::ProhibitedCharacter(c)));
    }

    // 2.4. Bidirectional Characters
    if is_prohibited_bidirectional_text(&normalized) {
        return Err(Error(ErrorCause::ProhibitedBidirectionalText));
    }

    // 2.5 Unassigned Code Points
    let unassigned = normalized
        .chars()
        .find(|&c| tables::unassigned_code_point(c));
    if let Some(c) = unassigned {
        return Err(Error(ErrorCause::ProhibitedCharacter(c)));
    }

    Ok(Cow::Owned(normalized))
}

// RFC3454, 6. Bidirectional Characters
fn is_prohibited_bidirectional_text(s: &str) -> bool {
    if s.contains(tables::bidi_r_or_al) {
        // 2) If a string contains any RandALCat character, the string
        // MUST NOT contain any LCat character.
        if s.contains(tables::bidi_l) {
            return true;
        }

        // 3) If a string contains any RandALCat character, a RandALCat
        // character MUST be the first character of the string, and a
        // RandALCat character MUST be the last character of the string.
        if !tables::bidi_r_or_al(s.chars().next().unwrap()) ||
           !tables::bidi_r_or_al(s.chars().next_back().unwrap()) {
            return true;
        }
    }

    false
}

/// Prepares a string with the Nameprep profile of the stringprep algorithm.
///
/// Nameprep is defined in [RFC 3491][].
///
/// [RFC 3491]: https://tools.ietf.org/html/rfc3491
pub fn nameprep<'a>(s: &'a str) -> Result<Cow<'a, str>, Error> {
    // 3. Mapping
    let mapped = s.chars()
        .filter(|&c| !tables::commonly_mapped_to_nothing(c))
        .flat_map(tables::case_fold_for_nfkc);

    // 4. Normalization
    let normalized = mapped.nfkc().collect::<String>();

    // 5. Prohibited Output
    let prohibited = normalized
        .chars()
        .find(|&c| {
            tables::non_ascii_space_character(c) /* C.1.2 */ ||
            tables::non_ascii_control_character(c) /* C.2.2 */ ||
            tables::private_use(c) /* C.3 */ ||
            tables::non_character_code_point(c) /* C.4 */ ||
            tables::surrogate_code(c) /* C.5 */ ||
            tables::inappropriate_for_plain_text(c) /* C.6 */ ||
            tables::inappropriate_for_canonical_representation(c) /* C.7 */ ||
            tables::change_display_properties_or_deprecated(c) /* C.9 */ ||
            tables::tagging_character(c) /* C.9 */
        });
    if let Some(c) = prohibited {
        return Err(Error(ErrorCause::ProhibitedCharacter(c)));
    }

    // 6. Bidirectional Characters
    if is_prohibited_bidirectional_text(&normalized) {
        return Err(Error(ErrorCause::ProhibitedBidirectionalText));
    }

    // 7 Unassigned Code Points
    let unassigned = normalized
        .chars()
        .find(|&c| tables::unassigned_code_point(c));
    if let Some(c) = unassigned {
        return Err(Error(ErrorCause::ProhibitedCharacter(c)));
    }

    Ok(Cow::Owned(normalized))
}

/// Prepares a string with the Nodeprep profile of the stringprep algorithm.
///
/// Nameprep is defined in [RFC 3920, Appendix A][].
///
/// [RFC 3920, Appendix A]: https://tools.ietf.org/html/rfc3920#appendix-A
pub fn nodeprep<'a>(s: &'a str) -> Result<Cow<'a, str>, Error> {
    // A.3. Mapping
    let mapped = s.chars()
        .filter(|&c| !tables::commonly_mapped_to_nothing(c))
        .flat_map(tables::case_fold_for_nfkc);

    // A.4. Normalization
    let normalized = mapped.nfkc().collect::<String>();

    // A.5. Prohibited Output
    let prohibited = normalized
        .chars()
        .find(|&c| {
            tables::ascii_space_character(c) /* C.1.1 */ ||
            tables::non_ascii_space_character(c) /* C.1.2 */ ||
            tables::ascii_control_character(c) /* C.2.1 */ ||
            tables::non_ascii_control_character(c) /* C.2.2 */ ||
            tables::private_use(c) /* C.3 */ ||
            tables::non_character_code_point(c) /* C.4 */ ||
            tables::surrogate_code(c) /* C.5 */ ||
            tables::inappropriate_for_plain_text(c) /* C.6 */ ||
            tables::inappropriate_for_canonical_representation(c) /* C.7 */ ||
            tables::change_display_properties_or_deprecated(c) /* C.9 */ ||
            tables::tagging_character(c) /* C.9 */ ||
            prohibited_node_character(c)
        });
    if let Some(c) = prohibited {
        return Err(Error(ErrorCause::ProhibitedCharacter(c)));
    }

    // A.6. Bidirectional Characters
    if is_prohibited_bidirectional_text(&normalized) {
        return Err(Error(ErrorCause::ProhibitedBidirectionalText));
    }

    let unassigned = normalized
        .chars()
        .find(|&c| tables::unassigned_code_point(c));
    if let Some(c) = unassigned {
        return Err(Error(ErrorCause::ProhibitedCharacter(c)));
    }

    Ok(Cow::Owned(normalized))
}

// Additional characters not allowed in JID nodes, by RFC3920.
fn prohibited_node_character(c: char) -> bool {
    match c {
        '"' | '&' | '\'' | '/' | ':' | '<' | '>' | '@' => true,
        _ => false
    }
}

/// Prepares a string with the Resourceprep profile of the stringprep algorithm.
///
/// Nameprep is defined in [RFC 3920, Appendix B][].
///
/// [RFC 3920, Appendix B]: https://tools.ietf.org/html/rfc3920#appendix-B
pub fn resourceprep<'a>(s: &'a str) -> Result<Cow<'a, str>, Error> {
    // B.3. Mapping
    let mapped = s.chars()
        .filter(|&c| !tables::commonly_mapped_to_nothing(c))
        .collect::<String>();

    // B.4. Normalization
    let normalized = mapped.nfkc().collect::<String>();

    // B.5. Prohibited Output
    let prohibited = normalized
        .chars()
        .find(|&c| {
            tables::non_ascii_space_character(c) /* C.1.2 */ ||
            tables::ascii_control_character(c) /* C.2.1 */ ||
            tables::non_ascii_control_character(c) /* C.2.2 */ ||
            tables::private_use(c) /* C.3 */ ||
            tables::non_character_code_point(c) /* C.4 */ ||
            tables::surrogate_code(c) /* C.5 */ ||
            tables::inappropriate_for_plain_text(c) /* C.6 */ ||
            tables::inappropriate_for_canonical_representation(c) /* C.7 */ ||
            tables::change_display_properties_or_deprecated(c) /* C.9 */ ||
            tables::tagging_character(c) /* C.9 */
        });
    if let Some(c) = prohibited {
        return Err(Error(ErrorCause::ProhibitedCharacter(c)));
    }

    // B.6. Bidirectional Characters
    if is_prohibited_bidirectional_text(&normalized) {
        return Err(Error(ErrorCause::ProhibitedBidirectionalText));
    }

    let unassigned = normalized
        .chars()
        .find(|&c| tables::unassigned_code_point(c));
    if let Some(c) = unassigned {
        return Err(Error(ErrorCause::ProhibitedCharacter(c)));
    }

    Ok(Cow::Owned(normalized))
}

#[cfg(test)]
mod test {
    use super::*;

	fn assert_prohibited_character<T>(result: Result<T, Error>) {
		match result {
			Err(Error(ErrorCause::ProhibitedCharacter(_))) => (),
			_ => assert!(false)
		}
	}

    // RFC4013, 3. Examples
    #[test]
    fn saslprep_examples() {
		assert_prohibited_character(saslprep("\u{0007}"));
    }

	#[test]
	fn nodeprep_examples() {
        assert_prohibited_character(nodeprep(" "));
        assert_prohibited_character(nodeprep("\u{00a0}"));
        assert_prohibited_character(nodeprep("foo@bar"));
	}

    #[test]
    fn resourceprep_examples() {
        assert_eq!("foo@bar", resourceprep("foo@bar").unwrap());
    }
}