crowbook_localize/
extractor.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with
3// this file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
5use message::Message;
6use error::{Error, Result};
7use common::{find_string, escape_string};
8
9use std::collections::HashMap;
10use std::path::Path;
11use std::fs::File;
12use std::io::Read;
13use std::io::Write;
14
15use regex::Regex;
16use walkdir::WalkDir;
17
18/// Struct that extracts all messages from source code and can print them
19/// to a `.pot` file.
20///
21/// This file can then be used as a starting point to begin translation.
22/// It should be relatively similar to `gettext` generated files.
23///
24/// # Example
25///
26/// ```
27/// use crowbook_intl::Extractor;
28/// let mut extractor = Extractor::new();
29/// extractor.add_messages_from_dir("src/").unwrap();
30/// println!("{}", extractor.generate_pot_file());
31/// ```
32///
33/// # Note
34///
35/// This struct only add messages that are considered as needing localization,
36/// that is, the first argument of calls so `lformat!` macro.
37pub struct Extractor {
38    messages: HashMap<String, Message>,
39    // Matches the format string (as used by `lformat!` and the actual escaped string
40    // given to potfile
41    orig_strings: HashMap<String, String>, 
42}
43
44impl Extractor {
45    /// Create a new, empty extractor
46    pub fn new() -> Extractor {
47        Extractor {
48            messages: HashMap::new(),
49            orig_strings: HashMap::new(), 
50        }
51    }
52
53    /// Returns a hashmap mapping the original strings (as used by `lformat!`)
54    /// to escaped strings. Only contains strings that are different and
55    /// must thus be handled.
56    pub fn original_strings<'a>(&'a self) -> &'a HashMap<String, String> {
57        &self.orig_strings
58    }
59
60    /// Add all the messages contained in a source file
61    pub fn add_messages_from_file<P: AsRef<Path>>(&mut self, file: P) -> Result<()> {
62        lazy_static! {
63            static ref REMOVE_COMMS: Regex = Regex::new(r#"//[^\n]*"#).unwrap();
64            static ref FIND_MSGS: Regex = Regex::new(r#"lformat!\("#).unwrap();
65        }
66        
67        let filename =  format!("{}", file.as_ref().display());
68        let mut f = try!(File::open(file)
69                         .map_err(|e| Error::parse(format!("could not open file {}: {}",
70                                                           &filename,
71                                                           e))));
72        let mut content = String::new();
73        try!(f.read_to_string(&mut content)
74            .map_err(|e| Error::parse(format!("could not read file {}: {}",
75                                              &filename,
76                                              e))));
77        content = REMOVE_COMMS.replace_all(&content, "");
78
79        for caps in FIND_MSGS.captures_iter(&content) {
80            let (_, pos) = caps.pos(0).unwrap();
81            let line = 1 + &content[..pos].bytes().filter(|b| b == &b'\n').count();
82            
83            let bytes = content[pos..].as_bytes();
84            let orig_msg: String = try!(find_string(bytes)
85                                   .map_err(|_| Error::parse(format!("{}:{}: could not parse as string",
86                                                                     &filename,
87                                                                     line))));
88            let msg = escape_string(orig_msg.as_str()).into_owned();
89            if msg != orig_msg {
90                self.orig_strings.insert(orig_msg, msg.clone());
91            }
92            
93            if self.messages.contains_key(msg.as_str()) {
94                self.messages.get_mut(&msg).unwrap().add_source(filename.as_str(), line);
95            } else {
96                let mut message = Message::new(msg.as_str());
97                message.add_source(filename.as_str(), line);
98                self.messages.insert(msg, message);
99            }
100        }
101
102        Ok(())
103    }
104
105    /// Add messages from all `.rs` files contained in a directory
106    /// (walks through subdirectories)
107    pub fn add_messages_from_dir<P: AsRef<Path>>(&mut self, dir: P) -> Result<()> {
108        let filtered =  WalkDir::new(dir)
109            .into_iter()
110            .filter_map(|e| e.ok())
111            .map(|e| e.path()
112                 .to_string_lossy()
113                 .into_owned())
114            .filter(|s| s.ends_with(".rs"));
115        for filename in filtered {
116            try!(self.add_messages_from_file(&filename));
117        }
118
119        Ok(())
120    }
121
122    /// Generate a pot-like file from the strings extracted from all files (if any)
123    pub fn generate_pot_file(&self) -> String {
124        let mut output = String::from(POT_HEADER);
125        let mut values = self.messages
126            .values()
127            .collect::<Vec<_>>();
128        values.sort();
129        for value in values {
130            output.push_str(&format!("{}", value));
131        }
132        output
133    }
134
135    /// Write a pot-like file to specified location
136    pub fn write_pot_file(&mut self, file: &str) -> Result<()> {
137        let mut f = try!(File::create(file).map_err(|e| Error::new(format!("Could not create file {}: {}",
138                                                                              file, e))));
139        let content = self.generate_pot_file();
140        try!(f.write_all(content.as_bytes())
141             .map_err(|e| Error::new(format!("Could not write to file {}: {}",
142                                             file, e))));
143        Ok(())
144    }
145}
146
147const POT_HEADER: &'static str = r#"# SOME DESCRIPTIVE TITLE
148# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
149# LICENSE
150# AUTHOR <EMAIL@ADDRESS>, YEAR.
151#
152#, fuzzy
153msgid ""
154msgstr ""
155"Content-Type: text/plain; charset=UTF-8\n"
156
157"#;