1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
//! A cache manager for the publicsuffix crate

extern crate publicsuffix;
#[macro_use]
extern crate lazy_static;
extern crate parking_lot;
#[macro_use]
extern crate slog;
#[macro_use]
extern crate slog_scope;
extern crate app_dirs;

#[cfg(test)]
mod tests;

use std::thread;
use std::io::Write;
use std::path::PathBuf;
use std::time::Duration;
use std::fs::{self, File};

use publicsuffix::errors::*;
use publicsuffix::{List, IntoUrl};
use parking_lot::{RwLock, RwLockReadGuard};
use slog::Logger;
use slog_scope::set_global_logger;
use app_dirs::{AppDataType, AppInfo, app_root};

lazy_static! {
    static ref LIST: RwLock<List> = RwLock::new(List::empty());
}

const APP_INFO: AppInfo = AppInfo {
    name: "publicsuffix",
    author: "mozilla",
};

/// The lock guard for the list
///
/// It derefences into an instance of `publicsuffix::List`.
pub type ListGuard<'a> = RwLockReadGuard<'a, List>;

#[derive(Debug, Clone)]
struct Cache {
    url: String,
    freq: Duration,
}

impl Cache {
    fn new(url: String, freq: Duration) -> Cache {
        Cache {
            url: url,
            freq: freq,
        }
    }

    fn path(&self) -> Result<PathBuf> {
        app_root(AppDataType::UserCache, &APP_INFO)
            .chain_err(|| "error accessing the data directory")
            .and_then(|mut file| {
                file.push("list.dat");
                Ok(file)
            })
    }

    fn update(&self) -> Result<()> {
        let mut list = LIST.write();
        *list = self.list()?;
        info!("the list has been updated successfully");
        Ok(())
    }

    fn list(&self) -> Result<List> {
        match self.path() {
            Ok(path) => {
                if path.is_file() {
                    let last_update = path.metadata()?.modified()?;
                    let elapsed = last_update.elapsed()
                        .chain_err(|| "failed to get elapsed time")?;
                    if elapsed > self.freq {
                        self.download_and_save()
                            .or_else(|error| {
                                info!("failed to download file: {}", error);
                                info!("updating the public suffix list from {}", path.to_str().unwrap());
                                List::from_path(path)
                            })
                    } else {
                        info!("updating the public suffix list from {}", path.to_str().unwrap());
                        List::from_path(path)
                            .or_else(|error| {
                                info!("failed to retrieve the list from local cache: {}", error);
                                info!("updating the public suffix list from {}", self.url);
                                self.download_and_save()
                            })
                    }
                } else {
                    self.download_and_save()
                }
            }
            Err(error) => {
                warn!("failed querying cache path: {}", error);
                self.download_and_save()
            }
        }
    }

    fn download_and_save(&self) -> Result<List> {
        info!("updating the public suffix list from {}", self.url);
        let list = List::from_url(&self.url)?;
        if let Err(error) = self.save(&list) {
            warn!("failed to save the list to disk: {}", error);
        }
        Ok(list)
    }

    fn save(&self, list: &List) -> Result<()> {
        let file = self.path()?;
        if list.all().is_empty() {
            fs::remove_file(file)?;
            return Ok(());
        }
        let mut data = String::with_capacity(list.all().len());
        if !list.icann().is_empty() {
            data.push_str("// ===BEGIN ICANN DOMAINS===\n");
            for rule in list.icann() {
                data.push_str(&format!("{}\n", rule));
            }
        }
        if !list.private().is_empty() {
            data.push_str("// ===BEGIN PRIVATE DOMAINS===\n");
            for rule in list.private() {
                data.push_str(&format!("{}\n", rule));
            }
        }
        let mut file = File::create(file)?;
        file.write_all(data.as_bytes())?;
        file.sync_all()?;
        Ok(())
    }
}

/// Initialise the list
///
/// Call from your `main` or `run` function. It fetches the list from `url`
/// using a certain interval.
///
/// ## Example
///
/// ```rust,norun
/// extern crate psl;
/// extern crate publicsuffix;
///
/// use publicsuffix::LIST_URL;
/// use std::time::Duration;
///
/// fn main() {
///     // Update the list every week
///     psl::init(LIST_URL, None).unwrap();
///
///     // Or update every 2 weeks
///     psl::init(LIST_URL, Duration::from_secs(60 * 60 * 24 * 7 * 2)).unwrap();
/// }
/// ```
///
/// If it fails to fetch the list for the first time it will return an error.
/// After it successfully fetches the list for the first time it will try to download
/// an update at `every` interval retrying every 5 minutes if it fails.
///
/// If you are using this in a long running server, I highly recommend you set up a logger
/// using `set_logger` so you will know if updates start failing at some point in future.
pub fn init<U, D>(url: U, every: D) -> Result<()>
    where U: IntoUrl,
          D: Into<Option<Duration>>
{
    let url = url.into_url()?.into_string();
    // default to updating the list every week
    let freq = every.into().unwrap_or(Duration::from_secs(60 * 60 * 24 * 7));
    let cache = Cache::new(url, freq);
    cache.update()?;
    thread::spawn(move || {
        loop {
            thread::sleep(cache.freq);
            loop {
                match cache.update() {
                    Ok(_) => break,
                    Err(error) => {
                        warn!("failed to update the list: {}", error);
                        info!("will try again in 5 minutes");
                        thread::sleep(Duration::from_secs(300));
                    }
                }
            }
        }
    });
    Ok(())
}

/// Gets an instance of the list from the cache
///
/// ListGuard derefs into `publicsuffix::List` so you can call the
/// list methods directly.
///
/// ## Example
/// 
/// ```rust,norun
/// # extern crate psl;
/// # extern crate publicsuffix;
/// # fn foo() -> Result<(), ::publicsuffix::errors::Error> {
/// let domain = psl::get().parse_domain("example.com")?;
/// # Ok(())
/// # }
/// # fn main() {}
/// ```
pub fn get<'a>() -> ListGuard<'a> {
    LIST.read()
}

/// Setup an `slog` logger
pub fn set_logger(l: &Logger)
{
    set_global_logger(l.clone());
}