use std::borrow::Cow;
use std::default::Default;
use std::error;
use std::fmt;
use std::io;
use byteorder::{BigEndian, ByteOrder, LittleEndian};
use encoding::label::encoding_from_whatwg_label;
use encoding::types::DecoderTrap::Strict;
use encoding::types::EncodingRef;
use crate::plurals::{Ast, Resolver};
use crate::{Catalog, Message};
use crate::metadata::parse_metadata;
#[allow(non_upper_case_globals)]
static utf8_encoding: EncodingRef = &encoding::codec::utf_8::UTF8Encoding;
#[derive(Debug)]
pub enum Error {
BadMagic,
DecodingError,
Eof,
Io(io::Error),
MalformedMetadata,
MisplacedMetadata,
PluralParsing,
UnknownEncoding,
}
use crate::Error::*;
impl error::Error for Error {
fn description(&self) -> &str {
match *self {
BadMagic => "bad magic number",
DecodingError => "invalid byte sequence in a string",
Eof => "unxpected end of file",
Io(ref err) => err.description(),
MalformedMetadata => "metadata syntax error",
MisplacedMetadata => "misplaced metadata",
UnknownEncoding => "unknown encoding specified",
PluralParsing => "invalid plural expression",
}
}
}
impl fmt::Display for Error {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
let self_err: &error::Error = self;
write!(fmt, "{}", self_err.description())
}
}
impl From<io::Error> for Error {
fn from(inner: io::Error) -> Error {
Io(inner)
}
}
impl From<Cow<'static, str>> for Error {
fn from(_: Cow<'static, str>) -> Error {
DecodingError
}
}
#[allow(missing_debug_implementations)]
#[derive(Default)]
pub struct ParseOptions {
force_encoding: Option<EncodingRef>,
force_plural: Option<fn(u64) -> usize>,
}
impl ParseOptions {
pub fn new() -> Self {
Default::default()
}
pub fn parse<R: io::Read>(self, reader: R) -> Result<Catalog, Error> {
parse_catalog(reader, self)
}
pub fn force_encoding(mut self, encoding: EncodingRef) -> Self {
self.force_encoding = Some(encoding);
self
}
pub fn force_plural(mut self, plural: fn(u64) -> usize) -> Self {
self.force_plural = Some(plural);
self
}
}
fn get_read_u32_fn(magic: &[u8]) -> Option<fn(&[u8]) -> u32> {
if magic == [0xde, 0x12, 0x04, 0x95] {
Some(LittleEndian::read_u32)
} else if magic == [0x95, 0x04, 0x12, 0xde] {
Some(BigEndian::read_u32)
} else {
None
}
}
pub fn parse_catalog<'a, R: io::Read>(mut file: R, opts: ParseOptions) -> Result<Catalog, Error> {
let mut contents = vec![];
let n = file.read_to_end(&mut contents)?;
if n < 28 {
return Err(Eof);
}
let read_u32 = get_read_u32_fn(&contents[0..4]).ok_or(BadMagic)?;
let num_strings = read_u32(&contents[8..12]) as usize;
let mut off_otable = read_u32(&contents[12..16]) as usize;
let mut off_ttable = read_u32(&contents[16..20]) as usize;
if n < off_otable || n < off_ttable {
return Err(Eof);
}
let mut catalog = Catalog::new();
if let Some(f) = opts.force_plural {
catalog.resolver = Resolver::Function(f);
}
let mut encoding = opts.force_encoding.unwrap_or(utf8_encoding);
for i in 0..num_strings {
if n < off_otable + 8 {
return Err(Eof);
}
let len = read_u32(&contents[off_otable..off_otable + 4]) as usize;
let off = read_u32(&contents[off_otable + 4..off_otable + 8]) as usize;
if n < off + len + 1 {
return Err(Eof);
}
let mut original = &contents[off..off + len + 1];
let context = match original.iter().position(|x| *x == 4) {
Some(idx) => {
let ctx = &original[..idx];
original = &original[idx + 1..];
Some(encoding.decode(ctx, Strict)?)
}
None => None,
};
let id = match original
.iter()
.position(|x| *x == 0)
.map(|i| &original[..i])
{
Some(b) => encoding.decode(b, Strict)?,
None => return Err(Eof),
};
if id == "" && i != 0 {
return Err(MisplacedMetadata);
}
if n < off_ttable + 8 {
return Err(Eof);
}
let len = read_u32(&contents[off_ttable..off_ttable + 4]) as usize;
let off = read_u32(&contents[off_ttable + 4..off_ttable + 8]) as usize;
if n < off + len + 1 {
return Err(Eof);
}
let translated = contents[off..off + len]
.split(|x| *x == 0)
.map(|b| encoding.decode(b, Strict))
.collect::<Result<Vec<_>, _>>()?;
if id == "" {
let map = parse_metadata(&*translated[0])?;
if let (Some(c), None) = (map.charset(), opts.force_encoding) {
encoding = encoding_from_whatwg_label(c).ok_or(UnknownEncoding)?;
}
if opts.force_plural.is_none() {
if let Some(p) = map.plural_forms().1 {
catalog.resolver = Ast::parse(p).map(Resolver::Expr)?;
}
}
}
catalog.insert(Message::new(id, context, translated));
off_otable += 8;
off_ttable += 8;
}
Ok(catalog)
}
pub fn default_resolver(n: u64) -> usize {
if n == 1 {
0
} else {
1
}
}
#[test]
fn test_get_read_u32_fn() {
use std::mem;
assert!(get_read_u32_fn(&[]).is_none());
assert!(get_read_u32_fn(&[0xde, 0x12, 0x04, 0x95, 0x00]).is_none());
{
let le_ptr: *const ();
let ret_ptr;
unsafe {
le_ptr = mem::transmute(LittleEndian::read_u32 as usize);
ret_ptr = mem::transmute(get_read_u32_fn(&[0xde, 0x12, 0x04, 0x95]).unwrap());
}
assert_eq!(le_ptr, ret_ptr);
}
{
let be_ptr: *const ();
let ret_ptr;
unsafe {
be_ptr = mem::transmute(BigEndian::read_u32 as usize);
ret_ptr = mem::transmute(get_read_u32_fn(&[0x95, 0x04, 0x12, 0xde]).unwrap());
}
assert_eq!(be_ptr, ret_ptr);
}
}
#[test]
fn test_parse_catalog() {
macro_rules! assert_variant {
($value:expr, $variant:path) => {
match $value {
$variant => (),
_ => panic!("Expected {:?}, got {:?}", $variant, $value),
}
};
}
let fluff = [0; 24];
{
let mut reader = vec![1u8, 2, 3];
reader.extend(fluff.iter().cloned());
let err = parse_catalog(&reader[..], ParseOptions::new()).unwrap_err();
assert_variant!(err, Eof);
}
{
let mut reader = vec![1u8, 2, 3, 4];
reader.extend(fluff.iter().cloned());
let err = parse_catalog(&reader[..], ParseOptions::new()).unwrap_err();
assert_variant!(err, BadMagic);
}
{
let mut reader = vec![0x95, 0x04, 0x12, 0xde];
reader.extend(fluff.iter().cloned());
assert!(parse_catalog(&reader[..], ParseOptions::new()).is_ok());
}
{
let mut reader = vec![0xde, 0x12, 0x04, 0x95];
reader.extend(fluff.iter().cloned());
assert!(parse_catalog(&reader[..], ParseOptions::new()).is_ok());
}
{
let reader: &[u8] = include_bytes!("../test_cases/1.mo");
let catalog = parse_catalog(reader, ParseOptions::new()).unwrap();
assert_eq!(catalog.strings.len(), 1);
assert_eq!(
catalog.strings["this is context\x04Text"],
Message::new("Text", Some("this is context"), vec!["Tekstas", "Tekstai"])
);
}
{
let reader: &[u8] = include_bytes!("../test_cases/2.mo");
let catalog = parse_catalog(reader, ParseOptions::new()).unwrap();
assert_eq!(catalog.strings.len(), 2);
assert_eq!(
catalog.strings["Image"],
Message::new("Image", None, vec!["Nuotrauka", "Nuotraukos"])
);
}
{
let reader: &[u8] = include_bytes!("../test_cases/invalid_utf8.mo");
let err = parse_catalog(reader, ParseOptions::new()).unwrap_err();
assert_variant!(err, DecodingError);
}
}