1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
use super::{Checkpointer, CheckpointerError};
use crate::module::State;
use burn_tensor::Element;
pub struct FileCheckpointer<P> {
directory: String,
name: String,
num_keep: usize,
_precision: P,
}
impl<P: Element> FileCheckpointer<P> {
pub fn new(directory: &str, name: &str, num_keep: usize) -> Self {
std::fs::create_dir_all(directory).ok();
Self {
directory: directory.to_string(),
name: name.to_string(),
num_keep,
_precision: P::default(),
}
}
fn path_for_epoch(&self, epoch: usize) -> String {
format!("{}/{}-{}.json.gz", self.directory, self.name, epoch)
}
}
impl<E, P> Checkpointer<E> for FileCheckpointer<P>
where
P: serde::Serialize + serde::de::DeserializeOwned + Element,
E: Element,
{
fn save(&self, epoch: usize, state: State<E>) -> Result<(), CheckpointerError> {
let file_path = self.path_for_epoch(epoch);
log::info!("Saving checkpoint {} to {}", epoch, file_path);
state
.convert::<P>()
.save(&file_path)
.map_err(CheckpointerError::IOError)?;
if self.num_keep > epoch {
return Ok(());
}
let file_path_old_checkpoint = self.path_for_epoch(epoch - self.num_keep);
if std::path::Path::new(&file_path_old_checkpoint).exists() {
log::info!("Removing checkpoint {}", file_path_old_checkpoint);
std::fs::remove_file(file_path_old_checkpoint).map_err(CheckpointerError::IOError)?;
}
Ok(())
}
fn restore(&self, epoch: usize) -> Result<State<E>, CheckpointerError> {
let file_path = self.path_for_epoch(epoch);
log::info!("Restoring checkpoint {} from {}", epoch, file_path);
let state = State::<P>::load(&file_path).map_err(CheckpointerError::StateError)?;
Ok(state.convert())
}
}