assemble-core 0.2.0

The core crate of the assemble-rs package
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
use crate::__export::from_str;
use crate::cryptography::{hash_file_sha256, Sha256};
use crate::exception::BuildError;
use crate::file_collection::{FileCollection, FileSet};
use crate::identifier::TaskId;
use crate::lazy_evaluation::anonymous::AnonymousProvider;
use crate::lazy_evaluation::{IntoProvider, Prop, Provider, ProviderExt, VecProp};
use crate::project::buildable::IntoBuildable;
use crate::project::error::ProjectResult;

use crate::provider;
use crate::task::work_handler::output::Output;
use crate::task::work_handler::serializer::Serializable;
use input::Input;

use once_cell::sync::OnceCell;
use serde::de::DeserializeOwned;
use serde::ser::Error as _;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::collections::{HashMap, HashSet};
use std::error::Error;
use std::fs::{create_dir_all, File};
use std::io;
use std::io::Read;
use std::io::Write;
use std::path::{Path, PathBuf};

use std::time::SystemTime;
use time::OffsetDateTime;
use crate::error::PayloadError;

pub mod input;
pub mod output;
pub mod serializer;

pub struct WorkHandler {
    task_id: TaskId,
    cache_location: PathBuf,
    inputs: VecProp<Serializable>,
    outputs: Option<FileSet>,
    serialized_output: HashMap<String, AnonymousProvider<Serializable>>,
    final_input: OnceCell<Input>,
    final_output: OnceCell<Option<Output>>,
    execution_history: OnceCell<TaskExecutionHistory>,
    up_to_date_status: OnceCell<bool>,
    did_work: bool,
}

#[derive(Debug, Serialize, Deserialize)]
struct TaskExecutionHistory {
    input: Input,
    output: Output,
}

impl WorkHandler {
    pub fn new(id: &TaskId, cache_loc: PathBuf) -> Self {
        Self {
            task_id: id.clone(),
            cache_location: cache_loc,
            inputs: VecProp::new(id.join("inputs").unwrap()),
            outputs: None,
            serialized_output: Default::default(),
            final_input: OnceCell::new(),
            final_output: OnceCell::new(),
            execution_history: OnceCell::new(),
            up_to_date_status: OnceCell::new(),
            did_work: true,
        }
    }

    pub fn has_inputs_and_outputs(&self) -> bool {
        !self.inputs.get().is_empty() && self.outputs.is_some()
    }

    /// Removes execution history, if it exists.
    pub fn remove_execution_history(&self) -> io::Result<()> {
        let path = self.task_id.as_path();
        let file_location = self.cache_location.join(path);
        if file_location.exists() {
            std::fs::remove_file(file_location)?;
        }
        Ok(())
    }

    /// Store execution data. Will only perform a store if there's both an input and an output
    pub fn store_execution_history(&self) -> ProjectResult<()> {
        let input = self.get_input()?.clone();
        if !input.any_inputs() {
            return Ok(());
        }
        let output = if let Some(output) = self.get_output()? {
            output.clone()
        } else {
            return Ok(());
        };
        let history = TaskExecutionHistory { input, output };
        let path = self.task_id.as_path();
        let file_location = self.cache_location.join(path);
        if let Some(parent) = file_location.parent() {
            create_dir_all(parent).map_err(PayloadError::new)?;
        }

        let mut file = File::options()
            .write(true)
            .truncate(true)
            .create(true)
            .open(file_location).map_err(PayloadError::new)?;

        serializer::to_writer(&mut file, &history)?;
        Ok(())
    }

    pub fn cache_input(&self, input: Input) -> io::Result<()> {
        let path = self.task_id.as_path();
        let file_location = self.cache_location.join(path);
        if let Some(parent) = file_location.parent() {
            create_dir_all(parent)?;
        }

        let mut file = File::options()
            .write(true)
            .truncate(true)
            .create(true)
            .open(file_location)?;

        serializer::to_writer(&mut file, &input).unwrap();
        Ok(())
    }

    fn try_get_execution_history(&self) -> Option<&TaskExecutionHistory> {
        self.execution_history
            .get_or_try_init(|| -> Result<TaskExecutionHistory, Box<dyn Error>> {
                let path = self.task_id.as_path();
                let file_location = self.cache_location.join(path);
                if file_location.exists() {
                    let mut read = File::open(&file_location)?;
                    let mut buffer = String::new();
                    read.read_to_string(&mut buffer)
                        .unwrap_or_else(|_| panic!("Could not read to end of {:?}", file_location));
                    Ok(from_str(&buffer)?)
                } else {
                    Err(Box::new(BuildError::new("no file found for cache")))
                }
            })
            .ok()
    }

    pub fn try_get_prev_input(&self) -> Option<&Input> {
        self.try_get_execution_history().map(|h| &h.input)
    }

    pub fn add_input<T: Serialize + Send + Sync + Clone + 'static, P: IntoProvider<T>>(
        &mut self,
        id: &str,
        value: P,
    ) -> ProjectResult
    where
        <P as IntoProvider<T>>::Provider: 'static,
    {
        let mut prop: Prop<Serializable> = self.task_id.prop(id).map_err(PayloadError::new)?;
        let value_provider = value.into_provider();
        prop.set_with(value_provider.flat_map(|v| Serializable::new(v))).map_err(PayloadError::new)?;
        self.inputs.push_with(prop);
        Ok(())
    }

    pub fn add_input_file<Pa: AsRef<Path> + 'static, P: IntoProvider<Pa>>(
        &mut self,
        id: &str,
        value: P,
    ) -> ProjectResult
    where
        Pa: Send + Sync + Clone,
        <P as IntoProvider<Pa>>::Provider: 'static + Clone,
    {
        let mut prop: Prop<Serializable> = self.task_id.prop(id).map_err(PayloadError::new)?;
        let provider = value.into_provider();
        let path_provider = provider.flat_map(|p| Serializable::new(InputFile::new(p.as_ref())));
        prop.set_with(path_provider).map_err(PayloadError::new)?;
        self.inputs.push_with(prop);
        Ok(())
    }

    pub fn add_input_files<Pa, P: IntoProvider<Pa>>(&mut self, id: &str, value: P) -> ProjectResult
    where
        Pa: FileCollection,
        Pa: Send + Sync + Clone + 'static,
        <P as IntoProvider<Pa>>::Provider: 'static + Clone,
    {
        let mut prop: Prop<Serializable> = self.task_id.prop(id).map_err(PayloadError::new)?;
        let provider = value.into_provider();
        let path_provider = provider.flat_map(|p: Pa| Serializable::new(InputFiles::new(p)));
        prop.set_with(path_provider).map_err(PayloadError::new)?;
        self.inputs.push_with(prop);
        Ok(())
    }

    pub fn add_input_prop<T: Serialize + Send + Sync + Clone + 'static, P>(
        &mut self,
        prop: &P,
    ) -> ProjectResult
    where
        P: IntoProvider<T> + Clone,
        <P as IntoProvider<T>>::Provider: 'static,
    {
        let prop = prop.clone().into_provider();
        let string_prov = AnonymousProvider::new(prop.flat_map(Serializable::new));
        self.inputs.push_with(string_prov);
        Ok(())
    }

    pub fn get_input(&self) -> ProjectResult<&Input> {
        self.final_input.get_or_try_init(|| {
            let inputs = self.inputs.fallible_get().map_err(PayloadError::new)?;
            let input = Input::new(&self.task_id, inputs);
            Ok(input)
        })
    }

    /// Add some output file collection. Can add outputs until [`get_output`](WorkHandler::get_output) is called.
    pub fn add_output<F: FileCollection>(&mut self, fc: F) {
        *self.outputs.get_or_insert(FileSet::new()) += FileSet::from_iter(fc.files());
    }

    /// Add some output file collection. Can add outputs until [`get_output`](WorkHandler::get_output) is called.
    pub fn add_output_provider<P, F>(&mut self, fc_provider: P)
    where
        P: Provider<F> + 'static,
        F: FileCollection + Send + Sync + Clone + 'static,
    {
        *self.outputs.get_or_insert(FileSet::new()) += FileSet::with_provider(fc_provider);
    }

    /// Add data that can be serialized, then deserialized later for reuse
    pub fn add_serialized_data<P, T: Serialize + DeserializeOwned + 'static + Send + Sync + Clone>(
        &mut self,
        id: &str,
        value: P,
    ) where
        P: IntoProvider<T>,
        P::Provider: 'static,
    {
        let mapped = value
            .into_provider()
            .flat_map(|s| Serializable::new(s).ok());

        self.serialized_output
            .insert(id.to_string(), AnonymousProvider::new(mapped));
    }

    /// Add data that can be serialized, then deserialized later for reuse
    pub fn add_empty_serialized_data(&mut self, id: &str) {
        self.serialized_output.insert(
            id.to_string(),
            AnonymousProvider::new(provider!(|| Serializable::new(()).unwrap())),
        );
    }

    /// Get the output of this file collection
    pub fn get_output(&self) -> ProjectResult<Option<&Output>> {
        self.final_output
            .get_or_try_init(|| -> ProjectResult<Option<Output>> {
                let mut serialized = HashMap::new();

                for (key, data) in &self.serialized_output {
                    serialized.insert(key.clone(), data.fallible_get().map_err(PayloadError::new)?);
                }

                Ok(self
                    .outputs
                    .as_ref()
                    .map(|o| Output::new(o.clone(), serialized.clone()))
                    .or_else(|| {
                        if serialized.is_empty() {
                            Some(Output::new(FileSet::new(), serialized.clone()))
                        } else {
                            None
                        }
                    }))
            })
            .map(|o| o.as_ref())
    }

    pub fn prev_work(&self) -> Option<(&Input, &Output)> {
        self.try_get_prev_input().zip(self.try_get_prev_output())
    }

    /// Try to get the output of the previous run
    pub fn try_get_prev_output(&self) -> Option<&Output> {
        self.try_get_execution_history().map(|h| &h.output)
    }

    pub fn did_work(&self) -> bool {
        self.did_work
    }

    pub fn set_did_work(&mut self, did_work: bool) {
        self.did_work = did_work;
    }

    pub fn set_up_to_date(&mut self, up_to_date_status: bool) {
        self.up_to_date_status
            .set(up_to_date_status)
            .expect("up to date status already set")
    }

    pub fn up_to_date(&self) -> &bool {
        self.up_to_date_status
            .get()
            .expect("up to date status not set")
    }

    fn serialize_data<T: Serialize>(val: T) -> impl Provider<String> {
        let string = serializer::to_string(&val).ok();
        // let owned = Arc::new(val) as Arc<dyn Serialize>;
        provider!(move || { string.clone() })
    }
}

impl IntoBuildable for &WorkHandler {
    type Buildable = VecProp<Serializable>;

    fn into_buildable(self) -> Self::Buildable {
        // let mut container = BuiltByContainer::new();
        // for i in &self.inputs {
        //     container.add(i.clone());
        // }
        // container
        self.inputs.clone().into_buildable()
    }
}

/// An input file is used to serialize a path
#[derive(Debug)]
pub struct InputFile(PathBuf);

impl InputFile {
    pub fn new(path: impl AsRef<Path>) -> Self {
        let path = path.as_ref().to_path_buf();
        Self(path)
    }

    /// Direct implementaiton of serialize
    pub fn serialize<P: AsRef<Path>, S: Serializer>(
        path: P,
        serializer: S,
    ) -> Result<S::Ok, S::Error> {
        Self::new(path).serialize(serializer)
    }

    pub fn deserialize<'de, D: Deserializer<'de>>(deserializer: D) -> Result<PathBuf, D::Error> {
        let data = InputFileData::deserialize(deserializer)?;
        Ok(data.path)
    }
}

#[derive(Serialize, Deserialize)]
struct InputFileData {
    path: PathBuf,
    data: Sha256,
}

impl Serialize for InputFile {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        if self.0.exists() {
            InputFileData {
                path: self.0.clone(),
                data: hash_file_sha256(&self.0).map_err(S::Error::custom)?,
            }
            .serialize(serializer)
        } else {
            ().serialize(serializer)
        }
    }
}

/// Represents change from previous run
#[derive(Default)]
pub enum ChangeStatus {
    /// Value was deleted.
    Deleted,
    /// Value was modified
    Modified,
    #[default]
    Added,
    Same,
}

/// Normalizes some system time to UTC time
pub fn normalize_system_time(system_time: SystemTime) -> OffsetDateTime {
    let duration = system_time
        .duration_since(SystemTime::UNIX_EPOCH)
        .expect("Couldn't determine duration since UNIX EPOCH");
    let start = OffsetDateTime::UNIX_EPOCH;
    start + duration
}

/// Used to serialize a fileset
pub struct InputFiles(FileSet);

impl InputFiles {
    fn new<F: FileCollection>(fc: F) -> Self {
        let fileset = FileSet::from_iter(fc.files());
        Self(fileset)
    }
}

impl Serialize for InputFiles {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        let files = self.0.files();
        if !files.is_empty() {
            let data = InputFilesData::new(self.0.clone());
            data.serialize(serializer)
        } else {
            ().serialize(serializer)
        }
    }
}

#[derive(Debug, Serialize)]
struct InputFilesData {
    all_files: HashSet<PathBuf>,
    data: HashMap<PathBuf, InputFile>,
}

impl InputFilesData {
    pub fn new(set: FileSet) -> Self {
        let files = set.files();
        Self {
            all_files: files.clone(),
            data: files
                .into_iter()
                .map(|f| (f.clone(), InputFile::new(f)))
                .collect(),
        }
    }
}