1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
use super::{Checkpointer, CheckpointerError};
use crate::module::State;
use burn_tensor::Element;

pub struct FileCheckpointer<P> {
    directory: String,
    name: String,
    num_keep: usize,
    _precision: P,
}

impl<P: Element> FileCheckpointer<P> {
    pub fn new(directory: &str, name: &str, num_keep: usize) -> Self {
        std::fs::create_dir_all(directory).ok();

        Self {
            directory: directory.to_string(),
            name: name.to_string(),
            num_keep,
            _precision: P::default(),
        }
    }
    fn path_for_epoch(&self, epoch: usize) -> String {
        format!("{}/{}-{}.json.gz", self.directory, self.name, epoch)
    }
}

impl<E, P> Checkpointer<E> for FileCheckpointer<P>
where
    P: serde::Serialize + serde::de::DeserializeOwned + Element,
    E: Element,
{
    fn save(&self, epoch: usize, state: State<E>) -> Result<(), CheckpointerError> {
        let file_path = self.path_for_epoch(epoch);
        log::info!("Saving checkpoint {} to {}", epoch, file_path);

        state
            .convert::<P>()
            .save(&file_path)
            .map_err(CheckpointerError::IOError)?;

        if self.num_keep > epoch {
            return Ok(());
        }

        let file_path_old_checkpoint = self.path_for_epoch(epoch - self.num_keep);

        if std::path::Path::new(&file_path_old_checkpoint).exists() {
            log::info!("Removing checkpoint {}", file_path_old_checkpoint);
            std::fs::remove_file(file_path_old_checkpoint).map_err(CheckpointerError::IOError)?;
        }

        Ok(())
    }

    fn restore(&self, epoch: usize) -> Result<State<E>, CheckpointerError> {
        let file_path = self.path_for_epoch(epoch);
        log::info!("Restoring checkpoint {} from {}", epoch, file_path);

        let state = State::<P>::load(&file_path).map_err(CheckpointerError::StateError)?;

        Ok(state.convert())
    }
}