1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
use std::collections::hash_map::HashMap;
use std::env::temp_dir;
use std::fs;
use std::hash::Hash;
use std::io::Result;
use std::path::PathBuf;
use uuid::Uuid;

use tempdir::TempDir;

/// Creates a directory structure suitable for storing large numbers of files.
/// Optionally deletes the created directory and files when dropped.
///
/// Slots for new files are allocated using `get_new_file()`. This struct will
/// create new subdirectories as needed to ensure that no subdirectory contains
/// more than 1,000 files/subdirectories.
pub struct FileTree {
    tmp_dir: Option<TempDir>,
    persistent_dir: Option<PathBuf>,
    counter: u64
}

impl FileTree {
    /// Create a new directory structure under `path`. If `persistent` is
    /// `false` the directory and all it's contents will be deleted when
    /// the returned `FileTree` is dropped
    ///
    /// # Examples
    ///
    /// Create a new temporary data structure and make sure the base path exists
    ///
    /// ```
    /// use file_tree::FileTree;
    /// use std::env::temp_dir;
    ///
    /// let file_tree = FileTree::new_in(temp_dir(), false).unwrap();
    /// assert!(file_tree.get_root().exists());
    /// ```
    ///
    /// # Errors
    ///
    /// If `persistent` is `false`, the directory will be created using
    /// `tempdir::TempDir`, and any related errors will be returned here
    pub fn new_in(path: PathBuf, persistent: bool) -> Result<FileTree> {
        if persistent {
            Ok(FileTree { tmp_dir: None,
                          persistent_dir: Some(path),
                          counter: 0 })
        } else {
            Ok(FileTree { tmp_dir: Some(TempDir::new_in(path, "file_tree")?),
                          persistent_dir: None,
                          counter: 0 })
        }
    }

    /// Create a new directory structure. If `persistent` is `false` the
    /// directory and all it's contents will be deleted when the returned
    /// `FileTree` is dropped.    
    ///
    /// # Examples
    ///
    /// Create a new temporary data structure and make sure the base path exists
    ///
    /// ```
    /// use file_tree::FileTree;
    ///
    /// let file_tree = FileTree::new(false).unwrap();
    /// assert!(file_tree.get_root().exists());
    /// ```
    ///
    /// # Errors
    ///
    /// If `persistent` is `false`, the directory will be created using
    /// `tempdir::TempDir`, and any related errors will be returned here
    pub fn new(persistent: bool) -> Result<FileTree> {
        if persistent {
            let uuid = Uuid::new_v4().hyphenated().to_string();

            Ok(FileTree { tmp_dir: None,
                          persistent_dir: Some(temp_dir().join(uuid)),
                          counter: 0 })
        } else {
            Ok(FileTree { tmp_dir: Some(TempDir::new("file_tree")?),
                          persistent_dir: None,
                          counter: 0 })
        }
    }

    /// Creates a `FileTree` from an existing directory structure. `path` should
    /// be equivalent to the result of calling `get_root()` on the previous
    /// (persistent) `FileTree`.
    ///
    /// # Examples
    ///
    /// Re-create a `FileTree` using an existing file structure
    ///
    /// ```
    /// use file_tree::FileTree;
    /// use std::fs::File;
    ///
    /// // create a `FileTree` with one file
    /// let mut ft = FileTree::new(true).unwrap();
    /// let file_path = ft.get_new_file().unwrap();
    /// File::create(file_path.clone()).unwrap();
    /// let base = ft.get_root();
    /// drop(ft);
    ///
    /// // create a `FileTree` using the existing path, and make sure that the
    /// // files we pull back don't overwrite the existing one
    /// let mut ft2 = FileTree::from_existing(base);
    /// let file2 = ft2.get_new_file().unwrap();
    /// assert_eq!(file_path.file_name().unwrap(), "000000000000");
    /// assert_eq!(file2.file_name().unwrap(), "000000000001");
    /// ```
    pub fn from_existing(path: PathBuf) -> FileTree {
        FileTree { tmp_dir: None,
                   persistent_dir: Some(path),
                   counter: 0 }
    }

    /// Returns a PathBuf pointing to an available slot in the file tree. The
    /// file pointed to by the returned `PathBuf` will not be created by
    /// this method call, but a new directory will be created if necessary.
    ///
    /// This method will ensure that the file pointed to by the returned
    /// `PathBuf` does not exist. If this struct was created using an existing
    /// directory structure existing files will be skipped over when generating
    /// new file names to return.
    ///
    /// File paths are generated such that each new leaf directory (starting
    /// with `000/000/000/`) will be filled entirely before creating a new
    /// directory (next would be `000/000/001/`).
    ///
    ///
    /// # Examples
    ///
    /// Retrieve two distinct file paths via `get_new_file()`
    ///
    /// ```
    /// use file_tree::FileTree;
    ///
    /// let mut file_tree = FileTree::new(false).unwrap();
    ///
    /// let writeable_path = file_tree.get_new_file().unwrap();
    /// assert_eq!(
    ///     writeable_path,
    ///     file_tree.get_root().join("000/000/000/000000000000")
    /// );
    ///
    /// let writeable_path_2 = file_tree.get_new_file().unwrap();
    /// assert_eq!(
    ///     writeable_path_2,
    ///     file_tree.get_root().join("000/000/000/000000000001")
    /// );
    /// ```
    ///
    /// # Errors
    ///
    /// If a new subdirectory is required, `fs::create_dir_all` will be called.
    /// Any errors from that call will be returned here
    pub fn get_new_file(&mut self) -> Result<PathBuf> {
        let mut new_file = self.get_new_file_uniq()?;
        while new_file.exists() {
            new_file = self.get_new_file_uniq()?;
        }
        Ok(new_file)
    }

    fn get_new_file_uniq(&mut self) -> Result<PathBuf> {
        let uid = format!("{:012}", self.counter);
        self.counter += 1;
        let mut buff = String::with_capacity(3);
        let mut parts = Vec::with_capacity(4);
        for c in uid.chars() {
            if buff.chars().count() >= 3 {
                parts.push(buff);
                buff = String::with_capacity(3);
            }
            buff.push(c);
        }
        if buff.chars().count() > 0 {
            parts.push(buff);
        }
        let path_str = format!("{0}/{1}/{2}", parts[0], parts[1], parts[2]);
        let path = self.get_root().join(path_str);
        match fs::create_dir_all(&path) {
            Ok(_) => Ok(path.join(uid)),
            Err(e) => Err(e)
        }
    }

    /// Return the root path for the file tree
    pub fn get_root(&self) -> PathBuf {
        match self.tmp_dir {
            Some(ref p) => p.path().to_path_buf(),
            None => self.persistent_dir.as_ref().unwrap().to_path_buf()
        }
    }
}

/// Retrieves paths from a `FileTree` using `Hash` key.
///
/// File paths are stored in memory, and associated with a key. When requesting
/// paths from a `KeyedFileTree`, an existing path will be returned if the key
/// has been seen before. Otherwise a new path will be created in the directory
/// structure and returned.
///
/// # Examples
///
/// ```
/// extern crate file_tree;
///
/// use file_tree::KeyedFileTree;
///
/// let mut file_tree = KeyedFileTree::new(false).unwrap();
///
/// let writeable_path_1 = file_tree.get(String::from("key1")).unwrap();
/// let writeable_path_2 = file_tree.get(String::from("key2")).unwrap();
///
/// assert_ne!(writeable_path_1, writeable_path_2);
/// ```
pub struct KeyedFileTree<T>
    where T: Hash + Eq {
    paths: HashMap<T, PathBuf>,
    file_tree: FileTree
}

impl<T> KeyedFileTree<T>
    where T: Hash + Eq
{
    /// Create a new instance. If `persistence` is `false`, the backing
    /// directory structure will be removed when the returned instance is
    /// dropped.
    pub fn new(persistent: bool) -> Result<KeyedFileTree<T>> {
        Ok(KeyedFileTree { paths: HashMap::new(),
                           file_tree: FileTree::new(persistent)? })
    }

    /// Create a new instance, storing the directory structure in `path`. If
    /// `persistence` is `false`, the backing directory structure will be
    /// removed when the returned instance is dropped.
    pub fn new_in(path: PathBuf, persistent: bool) -> Result<KeyedFileTree<T>> {
        Ok(KeyedFileTree { paths: HashMap::new(),
                           file_tree: FileTree::new_in(path, persistent)? })
    }

    /// Creates a new instance from an existing directory structure. `path`
    /// should be equivalent to the result of calling `get_root()` on the
    /// previous (persistent) `KeyedFileTree`, and `existing_files` should be
    /// equivalent to calling `get_existing_files()`.
    pub fn from_existing(path: PathBuf, existing_files: HashMap<T, PathBuf>) -> KeyedFileTree<T> {
        KeyedFileTree { paths: existing_files,
                        file_tree: FileTree::from_existing(path) }
    }

    /// Reserve a spot in the directory structure for `key`, and return the
    /// associated `PathBuf`. If `key` has already been seen, the existing
    /// `PathBuf` will be returned.
    pub fn get(&mut self, key: T) -> Result<PathBuf> {
        Ok(self.paths.entry(key)
               .or_insert(self.file_tree.get_new_file()?)
               .clone())
    }

    /// Return the root path for the file tree.
    pub fn get_root(&self) -> PathBuf { self.file_tree.get_root() }

    /// Gets the map of keys to `PathBuf`s. Useful for re-creating an instance
    /// later with `from_existing()`.
    pub fn get_existing_files(self) -> HashMap<T, PathBuf> { self.paths }
}