unicode-bom 1.1.3

Unicode byte-order mark detection for files and byte arrays.
Documentation
// Copyright © 2018 Phil Booth
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License. You may obtain
// a copy of the License at:
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.

use super::*;

#[test]
fn as_ref() {
    assert_eq!(Bom::Null.as_ref(), "[not set]");
    assert_eq!(Bom::Bocu1.as_ref(), "BOCU-1");
    assert_eq!(Bom::Gb18030.as_ref(), "GB 18030");
    assert_eq!(Bom::Scsu.as_ref(), "SCSU");
    assert_eq!(Bom::UtfEbcdic.as_ref(), "UTF-EBCDIC");
    assert_eq!(Bom::Utf1.as_ref(), "UTF-1");
    assert_eq!(Bom::Utf7.as_ref(), "UTF-7");
    assert_eq!(Bom::Utf8.as_ref(), "UTF-8");
    assert_eq!(Bom::Utf16Be.as_ref(), "UTF-16 (big-endian)");
    assert_eq!(Bom::Utf16Le.as_ref(), "UTF-16 (little-endian)");
    assert_eq!(Bom::Utf32Be.as_ref(), "UTF-32 (big-endian)");
    assert_eq!(Bom::Utf32Le.as_ref(), "UTF-32 (little-endian)");
}

#[test]
fn to_string() {
    assert_eq!(Bom::Null.to_string(), Bom::Null.as_ref());
}

#[test]
fn len() {
    assert_eq!(Bom::Null.len(), 0);
    assert_eq!(Bom::Bocu1.len(), 3);
    assert_eq!(Bom::Gb18030.len(), 4);
    assert_eq!(Bom::Scsu.len(), 3);
    assert_eq!(Bom::UtfEbcdic.len(), 4);
    assert_eq!(Bom::Utf1.len(), 3);
    assert_eq!(Bom::Utf7.len(), 4);
    assert_eq!(Bom::Utf8.len(), 3);
    assert_eq!(Bom::Utf16Be.len(), 2);
    assert_eq!(Bom::Utf16Le.len(), 2);
    assert_eq!(Bom::Utf32Be.len(), 4);
    assert_eq!(Bom::Utf32Le.len(), 4);
}

#[test]
fn default() {
    assert_eq!(Bom::default(), Bom::Null);
}

macro_rules! assert_bom {
    ([$($byte:expr),*], $bom:ident) => {
        assert_eq!(Bom::from(vec![$($byte as u8),*].as_slice()), Bom::$bom)
    }
}

#[test]
fn from_slice() {
    assert_bom!([], Null);

    assert_bom!([0, 0, 0xfe], Null);
    assert_bom!([0, 0, 0xfe, 0xfe], Null);
    assert_bom!([0, 0, 0xfe, 0xff], Utf32Be);
    assert_bom!([0, 0, 0xfe, 0xff, 42], Utf32Be);

    assert_bom!([0x0e, 0xff], Null);
    assert_bom!([0x0e, 0xff, 0xfe], Null);
    assert_bom!([0x0e, 0xfe, 0xff], Scsu);
    assert_bom!([0x0e, 0xfe, 0xff, 42], Scsu);

    assert_bom!([0x84, 0x31, 0x95], Null);
    assert_bom!([0x84, 0x31, 0x95, 0x32], Null);
    assert_bom!([0x84, 0x31, 0x95, 0x33], Gb18030);
    assert_bom!([0x84, 0x31, 0x95, 0x33, 42], Gb18030);
    assert_bom!([0x84, 0x31, 0x95, 0x34], Null);

    assert_bom!([0x2b, 0x2f, 0x76], Null);
    assert_bom!([0x2b, 0x2f, 0x76, 0x37], Null);
    assert_bom!([0x2b, 0x2f, 0x76, 0x38], Utf7);
    assert_bom!([0x2b, 0x2f, 0x76, 0x38, 42], Utf7);
    assert_bom!([0x2b, 0x2f, 0x76, 0x39], Utf7);
    assert_bom!([0x2b, 0x2f, 0x76, 0x39, 42], Utf7);
    assert_bom!([0x2b, 0x2f, 0x76, 0x3a], Null);

    assert_bom!([0x2b, 0x2f, 0x76, 0x2a], Null);
    assert_bom!([0x2b, 0x2f, 0x76, 0x2b], Utf7);
    assert_bom!([0x2b, 0x2f, 0x76, 0x2b, 42], Utf7);
    assert_bom!([0x2b, 0x2f, 0x76, 0x2c], Null);

    assert_bom!([0x2b, 0x2f, 0x76, 0x2e], Null);
    assert_bom!([0x2b, 0x2f, 0x76, 0x2f], Utf7);
    assert_bom!([0x2b, 0x2f, 0x76, 0x2f, 42], Utf7);
    assert_bom!([0x2b, 0x2f, 0x76, 0x30], Null);

    assert_bom!([0xdd, 0x73, 0x66], Null);
    assert_bom!([0xdd, 0x73, 0x66, 0x72], Null);
    assert_bom!([0xdd, 0x73, 0x66, 0x73], UtfEbcdic);
    assert_bom!([0xdd, 0x73, 0x66, 0x73, 42], UtfEbcdic);
    assert_bom!([0xdd, 0x73, 0x66, 0x74], Null);

    assert_bom!([0xef, 0xbb], Null);
    assert_bom!([0xef, 0xbb, 0xbe], Null);
    assert_bom!([0xef, 0xbb, 0xbf], Utf8);
    assert_bom!([0xef, 0xbb, 0xbf, 42], Utf8);
    assert_bom!([0xef, 0xbb, 0xc0], Null);

    assert_bom!([0xf7, 0x64], Null);
    assert_bom!([0xf7, 0x64, 0x4b], Null);
    assert_bom!([0xf7, 0x64, 0x4c], Utf1);
    assert_bom!([0xf7, 0x64, 0x4c, 42], Utf1);
    assert_bom!([0xf7, 0x64, 0x4d], Null);

    assert_bom!([0xfb, 0xee], Null);
    assert_bom!([0xfb, 0xee, 0x27], Null);
    assert_bom!([0xfb, 0xee, 0x28], Bocu1);
    assert_bom!([0xfb, 0xee, 0x28, 42], Bocu1);
    assert_bom!([0xfb, 0xee, 0x29], Null);

    assert_bom!([0xfe], Null);
    assert_bom!([0xfe, 0xfe], Null);
    assert_bom!([0xfe, 0xff], Utf16Be);
    assert_bom!([0xfe, 0xff, 42], Utf16Be);

    assert_bom!([0xff], Null);
    assert_bom!([0xff, 0xfd], Null);
    assert_bom!([0xff, 0xfe], Utf16Le);
    assert_bom!([0xff, 0xfe, 42], Utf16Le);
    assert_bom!([0xff, 0xff], Null);

    assert_bom!([0xff, 0xfe, 0], Utf16Le);
    assert_bom!([0xff, 0xfe, 0, 0], Utf32Le);
    assert_bom!([0xff, 0xfe, 0, 0, 42], Utf32Le);
    assert_bom!([0xff, 0xfe, 0, 1], Utf16Le);
    assert_bom!([0xff, 0xfe, 0, 1, 42], Utf16Le);
}

#[test]
fn from_file() {
    let mut file = File::open("fixtures/ascii.txt").unwrap();
    assert_eq!(Bom::from(&mut file), Bom::Null);

    let mut file = File::open("fixtures/utf16-le.txt").unwrap();
    assert_eq!(Bom::from(&mut file), Bom::Utf16Le);

    let mut file = File::open("fixtures/utf32-le.txt").unwrap();
    assert_eq!(Bom::from(&mut file), Bom::Utf32Le);
}

#[test]
fn from_path() -> Result<(), Error> {
    let bom: Bom = "fixtures/ascii.txt".parse()?;
    assert_eq!(bom, Bom::Null);

    let bom: Bom = "fixtures/utf16-le.txt".parse()?;
    assert_eq!(bom, Bom::Utf16Le);

    let bom: Bom = "fixtures/utf32-le.txt".parse()?;
    assert_eq!(bom, Bom::Utf32Le);
    Ok(())
}