1use std::default::Default;
2use std::io;
3
4use byteorder::{BigEndian, ByteOrder, LittleEndian};
5use encoding::label::encoding_from_whatwg_label;
6use encoding::types::DecoderTrap::Strict;
7use encoding::types::EncodingRef;
8
9use crate::metadata::parse_metadata;
10use crate::plurals::{Ast, Resolver};
11use crate::Error::{self, *};
12use crate::{Catalog, Message};
13
14#[allow(non_upper_case_globals)]
15static utf8_encoding: EncodingRef = &encoding::codec::utf_8::UTF8Encoding;
16
17#[allow(missing_debug_implementations)]
28#[derive(Default)]
29pub struct ParseOptions {
30 force_encoding: Option<EncodingRef>,
31 force_plural: Option<fn(u64) -> usize>,
32}
33
34impl ParseOptions {
35 pub fn new() -> Self {
37 Default::default()
38 }
39
40 pub fn parse<R: io::Read>(self, reader: R) -> Result<Catalog, Error> {
42 parse_catalog(reader, self)
43 }
44
45 pub fn force_encoding(mut self, encoding: EncodingRef) -> Self {
51 self.force_encoding = Some(encoding);
52 self
53 }
54
55 pub fn force_plural(mut self, plural: fn(u64) -> usize) -> Self {
61 self.force_plural = Some(plural);
62 self
63 }
64}
65
66fn get_read_u32_fn(magic: &[u8]) -> Option<fn(&[u8]) -> u32> {
69 if magic == [0xde, 0x12, 0x04, 0x95] {
70 Some(LittleEndian::read_u32)
71 } else if magic == [0x95, 0x04, 0x12, 0xde] {
72 Some(BigEndian::read_u32)
73 } else {
74 None
75 }
76}
77
78pub fn parse_catalog<R: io::Read>(mut file: R, opts: ParseOptions) -> Result<Catalog, Error> {
79 let mut contents = vec![];
80 let n = file.read_to_end(&mut contents)?;
81 if n < 28 {
82 return Err(Eof);
83 }
84
85 let read_u32 = get_read_u32_fn(&contents[0..4]).ok_or(BadMagic)?;
86
87 let num_strings = read_u32(&contents[8..12]) as usize;
89 let mut off_otable = read_u32(&contents[12..16]) as usize;
90 let mut off_ttable = read_u32(&contents[16..20]) as usize;
91 if n < off_otable || n < off_ttable {
92 return Err(Eof);
93 }
94
95 let mut catalog = Catalog::new();
96 if let Some(f) = opts.force_plural {
97 catalog.resolver = Resolver::Function(f);
98 }
99 let mut encoding = opts.force_encoding.unwrap_or(utf8_encoding);
100
101 for i in 0..num_strings {
102 if n < off_otable + 8 {
104 return Err(Eof);
105 }
106 let len = read_u32(&contents[off_otable..off_otable + 4]) as usize;
107 let off = read_u32(&contents[off_otable + 4..off_otable + 8]) as usize;
108 if n < off + len + 1 {
110 return Err(Eof);
111 }
112 let mut original = &contents[off..=off + len];
113 let context = match original.iter().position(|x| *x == 4) {
115 Some(idx) => {
116 let ctx = &original[..idx];
117 original = &original[idx + 1..];
118 Some(encoding.decode(ctx, Strict)?)
119 }
120 None => None,
121 };
122 let id = match original
124 .iter()
125 .position(|x| *x == 0)
126 .map(|i| &original[..i])
127 {
128 Some(b) => encoding.decode(b, Strict)?,
129 None => return Err(Eof),
130 };
131 if id == "" && i != 0 {
132 return Err(MisplacedMetadata);
133 }
134
135 if n < off_ttable + 8 {
137 return Err(Eof);
138 }
139 let len = read_u32(&contents[off_ttable..off_ttable + 4]) as usize;
140 let off = read_u32(&contents[off_ttable + 4..off_ttable + 8]) as usize;
141 if n < off + len + 1 {
143 return Err(Eof);
144 }
145 let translated = contents[off..off + len]
146 .split(|x| *x == 0)
147 .map(|b| encoding.decode(b, Strict))
148 .collect::<Result<Vec<_>, _>>()?;
149 if id == "" {
150 let map = parse_metadata(&*translated[0])?;
151 if let (Some(c), None) = (map.charset(), opts.force_encoding) {
152 encoding = encoding_from_whatwg_label(c).ok_or(UnknownEncoding)?;
153 }
154 if opts.force_plural.is_none() {
155 if let Some(p) = map.plural_forms().1 {
156 catalog.resolver = Ast::parse(p).map(Resolver::Expr)?;
157 }
158 }
159 }
160
161 catalog.insert(Message::new(id, context, translated));
162
163 off_otable += 8;
164 off_ttable += 8;
165 }
166
167 Ok(catalog)
168}
169
170pub fn default_resolver(n: u64) -> usize {
178 if n == 1 {
179 0
180 } else {
181 1
182 }
183}
184
185#[cfg(test)]
186mod test {
187 use super::*;
188
189 #[test]
190 fn test_get_read_u32_fn() {
191 assert!(get_read_u32_fn(&[]).is_none());
192 assert!(get_read_u32_fn(&[0xde, 0x12, 0x04, 0x95, 0x00]).is_none());
193
194 {
195 let le_ptr = LittleEndian::read_u32 as *const ();
196 let ret_ptr = get_read_u32_fn(&[0xde, 0x12, 0x04, 0x95]).unwrap() as _;
197 assert_eq!(le_ptr, ret_ptr);
198 }
199
200 {
201 let be_ptr = BigEndian::read_u32 as *const ();
202 let ret_ptr = get_read_u32_fn(&[0x95, 0x04, 0x12, 0xde]).unwrap() as _;
203 assert_eq!(be_ptr, ret_ptr);
204 }
205 }
206
207 #[test]
208 fn test_parse_catalog() {
209 macro_rules! assert_variant {
210 ($value:expr, $variant:path) => {
211 match $value {
212 $variant => (),
213 _ => panic!("Expected {:?}, got {:?}", $variant, $value),
214 }
215 };
216 }
217
218 let fluff = [0; 24]; {
221 let mut reader = vec![1u8, 2, 3];
222 reader.extend(fluff.iter().cloned());
223 let err = parse_catalog(&reader[..], ParseOptions::new()).unwrap_err();
224 assert_variant!(err, Eof);
225 }
226
227 {
228 let mut reader = vec![1u8, 2, 3, 4];
229 reader.extend(fluff.iter().cloned());
230 let err = parse_catalog(&reader[..], ParseOptions::new()).unwrap_err();
231 assert_variant!(err, BadMagic);
232 }
233
234 {
235 let mut reader = vec![0x95, 0x04, 0x12, 0xde];
236 reader.extend(fluff.iter().cloned());
237 assert!(parse_catalog(&reader[..], ParseOptions::new()).is_ok());
238 }
239
240 {
241 let mut reader = vec![0xde, 0x12, 0x04, 0x95];
242 reader.extend(fluff.iter().cloned());
243 assert!(parse_catalog(&reader[..], ParseOptions::new()).is_ok());
244 }
245
246 {
247 let reader: &[u8] = include_bytes!("../test_cases/1.mo");
248 let catalog = parse_catalog(reader, ParseOptions::new()).unwrap();
249 assert_eq!(catalog.strings.len(), 1);
250 assert_eq!(
251 catalog.strings["this is context\x04Text"],
252 Message::new("Text", Some("this is context"), vec!["Tekstas", "Tekstai"])
253 );
254 }
255
256 {
257 let reader: &[u8] = include_bytes!("../test_cases/2.mo");
258 let catalog = parse_catalog(reader, ParseOptions::new()).unwrap();
259 assert_eq!(catalog.strings.len(), 2);
260 assert_eq!(
261 catalog.strings["Image"],
262 Message::new("Image", None, vec!["Nuotrauka", "Nuotraukos"])
263 );
264 }
265
266 {
267 let reader: &[u8] = include_bytes!("../test_cases/invalid_utf8.mo");
268 let err = parse_catalog(reader, ParseOptions::new()).unwrap_err();
269 assert_variant!(err, DecodingError);
270 }
271 }
272}