1use locale_config::LanguageRange;
14use regex::{Regex,Captures};
15use std::collections::{BTreeMap,HashMap};
16use std::io::{BufRead,Lines};
17use std::iter::Peekable;
18use super::{CatalogueReader,Count,Error,Message,Origin,State,Unit};
19
20#[derive(Clone,Debug)]
21enum PoLine {
22 Comment(usize, char, String),
24 Message(usize, String, String, String),
26 Continuation(usize, String, String),
28 Blank,
30}
31
32struct LineIter<R: BufRead> {
33 _n: usize,
34 _inner: Lines<R>,
35}
36
37lazy_static!{
38 static ref MESSAGE_RE: Regex = Regex::new(
39 r#"^\s*(#~?\|?)?\s*(msgctxt|msgid|msgif_plural|msgstr(?:\[[012345]\])?)?\s*"(.*)"\s*$"#)
40 .unwrap();
41 static ref COMMENT_RE: Regex = Regex::new(
42 r#"^\s*#([:.,]?)\s*(.*)"#).unwrap();
43
44 static ref UNESCAPE_RE: Regex = Regex::new(r#"\\[rtn"\\]"#).unwrap();
45 static ref UNESCAPE_MAP: HashMap<&'static str, &'static str> = [
46 (r"\r", "\r"),
47 (r"\t", "\t"),
48 (r"\n", "\n"),
49 ("\\\"", "\""),
50 (r"\\", r"\"),
51 ].iter().cloned().collect();
52}
53
54fn parse_po_line(line: &str, n: usize) -> Result<PoLine, ()> {
55 if !line.contains(|c: char| !c.is_whitespace()) {
56 return Ok(PoLine::Blank);
57 }
58 if let Some(c) = MESSAGE_RE.captures(line) {
59 if c.get(2).is_some() {
60 return Ok(PoLine::Message(
61 n,
62 c.get(1).map(|x| x.as_str()).unwrap_or("").to_owned(),
63 if c.get(1).map(|x| x.as_str()).unwrap_or("").ends_with('|') {
64 String::from("|") + c.get(2).unwrap().as_str()
65 } else {
66 c.get(2).unwrap().as_str().to_owned()
67 },
68 UNESCAPE_RE.replace(
69 c.get(3).unwrap().as_str(),
70 |d: &Captures| -> String {
71 UNESCAPE_MAP.get(d.get(0).unwrap().as_str()).unwrap().to_string()
72 }).into_owned()));
73 } else {
74 return Ok(PoLine::Continuation(
75 n,
76 c.get(1).map(|x| x.as_str()).unwrap_or("").to_owned(),
77 UNESCAPE_RE.replace(
78 c.get(3).unwrap().as_str(),
79 |d: &Captures| -> String {
80 UNESCAPE_MAP.get(d.get(0).unwrap().as_str()).unwrap().to_string()
81 }).into_owned()));
82 }
83 }
84 if let Some(c) = COMMENT_RE.captures(line) {
85 return Ok(PoLine::Comment(
86 n,
87 c.get(1).unwrap().as_str().chars().next().unwrap_or(' '),
88 c.get(2).unwrap().as_str().to_owned()));
89 }
90 return Err(());
91}
92
93impl<R: BufRead> Iterator for LineIter<R> {
94 type Item = Result<PoLine, Error>;
95 fn next(&mut self) -> Option<Result<PoLine, Error>> {
96 loop {
97 let line = match self._inner.next() {
98 Some(Ok(s)) => s,
99 Some(Err(e)) => return Some(Err(Error::Io(self._n + 1, e))),
100 None => return None,
101 };
102 self._n += 1;
103 match parse_po_line(&line, self._n) {
104 Ok(PoLine::Blank) => (),
105 Ok(p) => return Some(Ok(p)),
106 Err(_) => return Some(Err(Error::Parse(self._n, Some(line), Vec::new()))),
107 }
108 }
109 }
110}
111
112impl<R: BufRead> LineIter<R> {
113 fn new(r: R) -> LineIter<R> {
114 LineIter {
115 _n: 0,
116 _inner: r.lines(),
117 }
118 }
119}
120
121trait MsgParser {
122 fn parse_comments(&mut self, unit: &mut Unit);
123 fn parse_msg(&mut self, tag: &str, unit: &mut Unit) -> Result<Option<String>, Error>;
124 fn expected(&mut self, exp: Vec<&'static str>) -> Result<Option<Unit>, Error>;
125}
126
127impl<R: BufRead> MsgParser for Peekable<LineIter<R>> {
128 fn parse_comments(&mut self, unit: &mut Unit) {
129 while let Some(&Ok(PoLine::Comment(..))) = self.peek() {
130 match self.next() {
131 Some(Ok(PoLine::Comment(_, ',', s))) => {
132 for flag in s.split(',').map(str::trim) {
133 match flag {
134 "fuzzy" => unit._state = State::NeedsWork,
135 _ => (), }
137 }
138 }
139 Some(Ok(PoLine::Comment(_, ':', s))) => {
140 unit._locations.extend(s.split(char::is_whitespace).filter(|x| !x.is_empty()).map(From::from));
141 }
142 Some(Ok(PoLine::Comment(_, '.', s))) => {
143 unit._notes.push((Origin::Developer, s));
144 }
145 Some(Ok(PoLine::Comment(_, ' ', s))) => {
146 unit._notes.push((Origin::Translator, s));
147 }
148 _ => unreachable!(), }
150 }
151 }
152
153 fn parse_msg(&mut self, tag: &str, unit: &mut Unit) -> Result<Option<String>, Error> {
154 if is!(self.peek() => Some(&Err(_))) {
155 return Err(self.next().unwrap().unwrap_err())
157 }
158
159 let prefix;
160 let mut string;
161
162 if is!(self.peek() =>
163 Some(&Ok(PoLine::Message(_, ref p, ref t, _)))
164 if t == tag && p.starts_with("#~") == unit._obsolete) {
165 if let PoLine::Message(_, p, _, s) = self.next().unwrap().unwrap() {
166 prefix = p;
167 string = s;
168 } else {
169 unreachable!()
170 }
171 } else {
172 return Ok(None); }
174
175 loop {
176 if is!(self.peek() => Some(&Err(_))) {
177 return Err(self.next().unwrap().unwrap_err())
179 }
180
181 if is!(self.peek() =>
182 Some(&Ok(PoLine::Continuation(_, ref p, _)))
183 if *p == prefix) {
184 if let PoLine::Continuation(_, _, s) = self.next().unwrap().unwrap() {
185 string.push_str(&s);
186 } else {
187 unreachable!();
188 }
189 } else {
190 break;
191 }
192 }
193 Ok(Some(string))
194 }
195
196 fn expected(&mut self, exp: Vec<&'static str>) -> Result<Option<Unit>, Error> {
197 match self.peek() {
198 Some(&Ok(PoLine::Message(n, ref p, ..))) =>
199 Err(Error::Parse(n, Some(p.clone()), exp)),
200 Some(&Ok(PoLine::Continuation(n, ..))) =>
201 Err(Error::Parse(n, Some("\"".to_owned()), exp)),
202 Some(&Ok(PoLine::Comment(n, c, ..))) =>
203 Err(Error::Parse(n, Some(format!("#{}", c)), exp)),
204 None =>
205 Ok(None),
206 _ => panic!("Should not happen!"),
207 }
208 }
209}
210
211fn is_header(oru: &Option<Result<Unit, Error>>) -> bool {
212 match oru {
213 &Some(Ok(ref u)) => u.source().is_singular() && u.source().is_blank(),
214 _ => false,
215 }
216}
217
218pub struct PoReader<R: BufRead> {
219 _lines: Peekable<LineIter<R>>,
220 _next_unit: Option<Result<Unit, Error>>,
221 _failed: Option<Error>,
222 _header: HashMap<String, String>,
223 _target_language: LanguageRange<'static>,
224 _plurals: Vec<Count>,
225}
226
227impl<R: BufRead> PoReader<R> {
228 pub fn new(reader: R) -> Self {
229 let mut res = PoReader {
230 _lines: LineIter::new(reader).peekable(),
231 _next_unit: None,
232 _failed: None,
233 _header: HashMap::new(),
234 _target_language: LanguageRange::invariant(),
235 _plurals: Vec::new(),
236 };
237 res._next_unit = res.next_unit();
238 if is_header(&res._next_unit) {
239 res.parse_po_header();
240 res._next_unit = res.next_unit();
241 }
242 return res;
243 }
244
245 fn make_source(msgid: Option<String>, msgid_plural: Option<String>) -> Message {
246 if msgid.is_none() {
247 Message::Empty
248 } else if msgid_plural.is_none() {
249 Message::Singular(msgid.unwrap())
250 } else {
251 let mut map = BTreeMap::new();
252 map.insert(Count::One, msgid.unwrap());
253 map.insert(Count::Other, msgid_plural.unwrap());
254 Message::Plural(map)
255 }
256 }
257
258 fn parse_unit(&mut self) -> Result<Option<Unit>, Error> {
259 let mut unit = Unit::default();
260
261 self._lines.parse_comments(&mut unit);
262 match self._lines.peek() {
263 None => return Ok(None), Some(&Ok(PoLine::Message(_, ref p, ..))) if p.starts_with("#~") => unit._obsolete = true,
266 _ => (),
267 }
268
269 unit._prev_context = self._lines.parse_msg("|msgctxt", &mut unit)?;
270
271 let prev_msgid = self._lines.parse_msg("|msgid", &mut unit)?;
272 let prev_msgid_pl = if prev_msgid.is_some() {
273 self._lines.parse_msg("|msgid_plural", &mut unit)?
274 } else { None };
275 unit._prev_source = Self::make_source(prev_msgid, prev_msgid_pl);
276
277 unit._context = self._lines.parse_msg("msgctxt", &mut unit)?;
278
279 let msgid = self._lines.parse_msg("msgid", &mut unit)?;
280 if msgid.is_none() {
281 return self._lines.expected(vec!["msgid"]);
282 }
283 let msgid_pl = self._lines.parse_msg("msgid_plural", &mut unit)?;
284 unit._source = Self::make_source(msgid, msgid_pl);
285
286 if unit._source.is_singular() {
287 match self._lines.parse_msg("msgstr", &mut unit)? {
289 None => return self._lines.expected(vec!["msgstr"]),
290 Some(s) => unit._target = Message::Singular(s),
291 }
292 } else {
293 assert!(unit._source.is_plural());
294 const TAGS: &'static [&'static str] =
295 &["msgstr[0]", "msgstr[1]", "msgstr[2]", "msgstr[3]", "msgstr[4]", "msgstr[5]", "msgstr[6]"];
296 let mut map = BTreeMap::new();
297 for (c, t) in self._plurals.iter().zip(TAGS) {
298 match self._lines.parse_msg(t, &mut unit)? {
299 None => return self._lines.expected(vec![t]),
300 Some(s) => { map.insert(*c, s); }
301 }
302 }
303 unit._target = Message::Plural(map);
304 }
305
306 if unit._state == State::Empty && !unit._target.is_blank() {
307 unit._state = State::Final;
309 }
310
311 assert!(!unit._source.is_empty());
312 return Ok(Some(unit));
313 }
314
315 fn next_unit(&mut self) -> Option<Result<Unit, Error>> {
316 match self.parse_unit() {
317 Ok(None) => None,
318 Ok(Some(u)) => Some(Ok(u)),
319 Err(e) => Some(Err(e)),
320 }
321 }
322
323 fn parse_po_header(&mut self) {
324 if let Some(Ok(ref u)) = self._next_unit {
325 for line in u._target.singular().unwrap_or("").split('\n') {
326 if let Some(n) = line.find(':') {
327 let key = line[..n].trim();
328 let val = line[(n+1)..].trim();
329 self._header.insert(key.to_owned(), val.to_owned());
330 }
331 }
332 if let Some(lang) = self._header.get("Language") {
333 self._target_language
334 = LanguageRange::new(lang)
335 .map(LanguageRange::into_static)
336 .or_else(|_| LanguageRange::from_unix(lang))
337 .unwrap_or_else(|_| LanguageRange::invariant());
338 }
339 }
341 }
342}
343
344impl<R: BufRead> Iterator for PoReader<R> {
345 type Item = Result<Unit, Error>;
346 fn next(&mut self) -> Option<Result<Unit, Error>> {
347 if self._next_unit.is_none() {
348 return None;
349 }
350
351 let mut res = self.next_unit();
352 ::std::mem::swap(&mut res, &mut self._next_unit);
353 return res;
354 }
355}
356
357impl<R: BufRead> CatalogueReader for PoReader<R> {
358 fn target_language(&self) -> &LanguageRange<'static> {
359 &self._target_language
360 }
361}
362
363#[cfg(test)]
364mod tests {
365 use ::CatalogueReader;
366 use ::locale_config::LanguageRange;
367 use ::Message::*;
368 use ::Origin::*;
369 use super::PoReader;
370
371 static SAMPLE_PO: &'static str = r###"
372msgid ""
373msgstr ""
374"Project-Id-Version: translate-storage test\n"
375"PO-Revision-Date: 2017-04-24 21:39+02:00\n"
376"Last-Translator: Jan Hudec <bulb@ucw.cz>\n"
377"Language-Team: Czech\n"
378"Language: cs\n"
379"MIME-Version: 1.0\n"
380"Content-Type: text/plain; charset=ISO-8859-2\n"
381"Content-Transfer-Encoding: 8bit\n"
382"Plural-Forms: nplurals=3; plural=(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2;\n"
383
384msgid "Simple message"
385msgstr "Jednoduchá zpráva"
386
387#. Extracted comment
388# Translator comment
389#: Location:42 Another:69
390#, fuzzy
391#| msgctxt "ConTeXt"
392#| msgid "Previous message"
393msgctxt "ConTeXt"
394msgid "Changed message"
395msgstr "Změněná\n"
396"zpráva"
397
398msgid "Untranslated message"
399msgstr ""
400
401# Another comment
402#~ msgid "Obsolete message"
403#~ msgstr "Zastaralá zpráva"
404
405"###;
406
407 #[test]
408 fn integration_test() {
409 let mut reader = PoReader::new(SAMPLE_PO.as_ref());
410
411 assert_eq!(LanguageRange::new("cs").unwrap(), *reader.target_language());
412
413 let u1 = reader.next().unwrap().unwrap();
414 assert_eq!(None, *u1.context());
415 assert_eq!(Singular("Simple message".to_owned()), *u1.source());
416 assert_eq!(Singular("Jednoduchá zpráva".to_owned()), *u1.target());
417 assert_eq!(None, *u1.prev_context());
418 assert_eq!(Empty, *u1.prev_source());
419 assert!(u1.notes().is_empty());
420 assert!(u1.locations().is_empty());
421 assert_eq!(::State::Final, u1.state());
422 assert!(u1.is_translated());
423 assert!(!u1.is_obsolete());
424
425 let u2 = reader.next().unwrap().unwrap();
426 assert_eq!(Some("ConTeXt".to_owned()), *u2.context());
427 assert_eq!(Singular("Changed message".to_owned()), *u2.source());
428 assert_eq!(Singular("Změněná\nzpráva".to_owned()), *u2.target());
429 assert_eq!(Some("ConTeXt".to_owned()), *u2.prev_context());
430 assert_eq!(Singular("Previous message".to_owned()), *u2.prev_source());
431 assert_eq!(&[
432 (Developer, "Extracted comment".to_owned()),
433 (Translator, "Translator comment".to_owned()),
434 ], u2.notes().as_slice());
435 assert_eq!(&[
436 "Location:42".to_owned(),
437 "Another:69".to_owned(),
438 ], u2.locations().as_slice());
439 assert_eq!(::State::NeedsWork, u2.state());
440 assert!(!u2.is_translated());
441 assert!(!u2.is_obsolete());
442
443 let u3 = reader.next().unwrap().unwrap();
444 assert_eq!(None, *u3.context());
445 assert_eq!(Singular("Untranslated message".to_owned()), *u3.source());
446 assert_eq!(Singular("".to_owned()), *u3.target());
447 assert_eq!(None, *u3.prev_context());
448 assert_eq!(Empty, *u3.prev_source());
449 assert!(u3.notes().is_empty());
450 assert!(u3.locations().is_empty());
451 assert_eq!(::State::Empty, u3.state());
452 assert!(!u3.is_translated());
453 assert!(!u3.is_obsolete());
454
455 let u4 = reader.next().unwrap().unwrap();
456 assert_eq!(None, *u4.context());
457 assert_eq!(Singular("Obsolete message".to_owned()), *u4.source());
458 assert_eq!(Singular("Zastaralá zpráva".to_owned()), *u4.target());
459 assert_eq!(None, *u4.prev_context());
460 assert_eq!(Empty, *u4.prev_source());
461 assert_eq!(&[
462 (Translator, "Another comment".to_owned()),
463 ], u4.notes().as_slice());
464 assert!(u4.locations().is_empty());
465 assert_eq!(::State::Final, u4.state());
466 assert!(u4.is_translated());
467 assert!(u4.is_obsolete());
468
469 assert!(reader.next().is_none());
470 }
471}