1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
//
// imag - the personal information management suite for the commandline
// Copyright (C) 2015-2019 Matthias Beyer <mail@beyermatthias.de> and contributors
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; version
// 2.1 of the License.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
//

/// External linking is a complex implementation to be able to serve a clean and easy-to-use
/// interface.
///
/// Internally, there are no such things as "external links" (plural). Each Entry in the store can
/// only have _one_ external link.
///
/// This library does the following therefor: It allows you to have several external links with one
/// entry, which are internally one file in the store for each link, linked with "internal
/// linking".
///
/// This helps us greatly with deduplication of URLs.
///

use std::ops::DerefMut;
use std::collections::BTreeMap;
use std::fmt::Debug;

use libimagstore::store::Entry;
use libimagstore::store::Store;
use libimagstore::storeid::StoreId;
use libimagstore::storeid::IntoStoreId;
use libimagutil::debug_result::*;
use libimagerror::errors::ErrorMsg as EM;

use toml_query::read::TomlValueReadExt;
use toml_query::read::TomlValueReadTypeExt;
use toml_query::insert::TomlValueInsertExt;
use failure::Error;
use failure::Fallible as Result;
use failure::ResultExt;
use failure::err_msg;

use internal::InternalLinker;
use module_path::ModuleEntryPath;

use self::iter::*;

use toml::Value;
use url::Url;
use sha1::{Sha1, Digest};
use hex;

pub trait Link {

    fn get_link_uri_from_filelockentry(&self) -> Result<Option<Url>>;

    fn get_url(&self) -> Result<Option<Url>>;

}

impl Link for Entry {

    fn get_link_uri_from_filelockentry(&self) -> Result<Option<Url>> {
        self.get_header()
            .read_string("links.external.content.url")
            .map_err(Error::from)
            .context(EM::EntryHeaderReadError)
            .map_err(Error::from)
            .and_then(|opt| match opt {
                None        => Ok(None),
                Some(ref s) => {
                    debug!("Found url, parsing: {:?}", s);
                    Url::parse(&s[..])
                        .map_err(Error::from)
                        .context(err_msg("Invalid URI"))
                        .map_err(Error::from)
                        .map(Some)
                },
            })
            .map_err(Error::from)
    }

    fn get_url(&self) -> Result<Option<Url>> {
        match self.get_header().read_string("links.external.url")? {
            None        => Ok(None),
            Some(ref s) => Url::parse(&s[..])
                .map(Some)
                .map_err(Error::from)
                .context(EM::EntryHeaderReadError)
                .map_err(Error::from),
        }
    }

}

pub trait ExternalLinker : InternalLinker {

    /// Get the external links from the implementor object
    fn get_external_links<'a>(&self, store: &'a Store) -> Result<UrlIter<'a>>;

    /// Set the external links for the implementor object
    fn set_external_links(&mut self, store: &Store, links: Vec<Url>) -> Result<Vec<StoreId>>;

    /// Add an external link to the implementor object
    fn add_external_link(&mut self, store: &Store, link: Url) -> Result<Vec<StoreId>>;

    /// Remove an external link from the implementor object
    fn remove_external_link(&mut self, store: &Store, link: Url) -> Result<Vec<StoreId>>;

}

pub mod iter {
    //! Iterator helpers for external linking stuff
    //!
    //! Contains also helpers to filter iterators for external/internal links
    //!
    //!
    //! # Warning
    //!
    //! This module uses `internal::Link` as link type, so we operate on _store ids_ here.
    //!
    //! Not to confuse with `external::Link` which is a real `FileLockEntry` under the hood.
    //!

    use libimagutil::debug_result::*;
    use libimagstore::store::Store;

    use internal::Link;
    use internal::iter::LinkIter;
    use failure::Fallible as Result;

    use url::Url;

    /// Helper for building `OnlyExternalIter` and `NoExternalIter`
    ///
    /// The boolean value defines, how to interpret the `is_external_link_storeid()` return value
    /// (here as "pred"):
    ///
    /// ```ignore
    ///     pred | bool | xor | take?
    ///     ---- | ---- | --- | ----
    ///        0 |    0 |   0 |   1
    ///        0 |    1 |   1 |   0
    ///        1 |    0 |   1 |   0
    ///        1 |    1 |   0 |   1
    /// ```
    ///
    /// If `bool` says "take if return value is false", we take the element if the `pred` returns
    /// false... and so on.
    ///
    /// As we can see, the operator between these two operants is `!(a ^ b)`.
    pub struct ExternalFilterIter(LinkIter, bool);

    impl Iterator for ExternalFilterIter {
        type Item = Link;

        fn next(&mut self) -> Option<Self::Item> {
            use super::is_external_link_storeid;

            while let Some(elem) = self.0.next() {
                trace!("Check whether is external: {:?}", elem);
                if !(self.1 ^ is_external_link_storeid(&elem)) {
                    trace!("Is external id: {:?}", elem);
                    return Some(elem);
                }
            }
            None
        }
    }

    /// Helper trait to be implemented on `LinkIter` to select or deselect all external links
    ///
    /// # See also
    ///
    /// Also see `OnlyExternalIter` and `NoExternalIter` and the helper traits/functions
    /// `OnlyInteralLinks`/`only_internal_links()` and `OnlyExternalLinks`/`only_external_links()`.
    pub trait SelectExternal {
        fn select_external_links(self, b: bool) -> ExternalFilterIter;
    }

    impl SelectExternal for LinkIter {
        fn select_external_links(self, b: bool) -> ExternalFilterIter {
            ExternalFilterIter(self, b)
        }
    }


    pub struct OnlyExternalIter(ExternalFilterIter);

    impl OnlyExternalIter {
        pub fn new(li: LinkIter) -> OnlyExternalIter {
            OnlyExternalIter(ExternalFilterIter(li, true))
        }

        pub fn urls<'a>(self, store: &'a Store) -> UrlIter<'a> {
            UrlIter(self, store)
        }
    }

    impl Iterator for OnlyExternalIter {
        type Item = Link;

        fn next(&mut self) -> Option<Self::Item> {
            self.0.next()
        }
    }

    pub struct NoExternalIter(ExternalFilterIter);

    impl NoExternalIter {
        pub fn new(li: LinkIter) -> NoExternalIter {
            NoExternalIter(ExternalFilterIter(li, false))
        }
    }

    impl Iterator for NoExternalIter {
        type Item = Link;

        fn next(&mut self) -> Option<Self::Item> {
            self.0.next()
        }
    }

    pub trait OnlyExternalLinks : Sized {
        fn only_external_links(self) -> OnlyExternalIter ;

        fn no_internal_links(self) -> OnlyExternalIter {
            self.only_external_links()
        }
    }

    impl OnlyExternalLinks for LinkIter {
        fn only_external_links(self) -> OnlyExternalIter {
            OnlyExternalIter::new(self)
        }
    }

    pub trait OnlyInternalLinks : Sized {
        fn only_internal_links(self) -> NoExternalIter;

        fn no_external_links(self) -> NoExternalIter {
            self.only_internal_links()
        }
    }

    impl OnlyInternalLinks for LinkIter {
        fn only_internal_links(self) -> NoExternalIter {
            NoExternalIter::new(self)
        }
    }

    pub struct UrlIter<'a>(OnlyExternalIter, &'a Store);

    impl<'a> Iterator for UrlIter<'a> {
        type Item = Result<Url>;

        fn next(&mut self) -> Option<Self::Item> {
            use external::Link;

            loop {
                let next = self.0
                    .next()
                    .map(|id| {
                        debug!("Retrieving entry for id: '{:?}'", id);
                        self.1
                            .retrieve(id.clone())
                            .map_dbg_err(|_| format!("Retrieving entry for id: '{:?}' failed", id))
                            .map_err(From::from)
                            .and_then(|f| {
                                debug!("Store::retrieve({:?}) succeeded", id);
                                debug!("getting external link from file now");
                                f.get_link_uri_from_filelockentry()
                                    .map_dbg_str("Error happened while getting link URI from FLE")
                                    .map_dbg_err(|e| format!("URL -> Err = {:?}", e))
                            })
                    });

                match next {
                    Some(Ok(Some(link))) => return Some(Ok(link)),
                    Some(Ok(None))       => continue,
                    Some(Err(e))         => return Some(Err(e)),
                    None                 => return None
                }
            }
        }

    }

}


/// Check whether the StoreId starts with `/link/external/`
pub fn is_external_link_storeid<A: AsRef<StoreId> + Debug>(id: A) -> bool {
    debug!("Checking whether this is a 'links/external/': '{:?}'", id);
    id.as_ref().local().starts_with("links/external")
}

/// Implement `ExternalLinker` for `Entry`, hiding the fact that there is no such thing as an external
/// link in an entry, but internal links to other entries which serve as external links, as one
/// entry in the store can only have one external link.
impl ExternalLinker for Entry {

    /// Get the external links from the implementor object
    fn get_external_links<'a>(&self, store: &'a Store) -> Result<UrlIter<'a>> {
        // Iterate through all internal links and filter for FileLockEntries which live in
        // /link/external/<SHA> -> load these files and get the external link from their headers,
        // put them into the return vector.
        self.get_internal_links()
            .map(|iter| {
                debug!("Getting external links");
                iter.only_external_links().urls(store)
            })
    }

    /// Set the external links for the implementor object
    ///
    /// # Return Value
    ///
    /// Returns the StoreIds which were newly created for the new external links, if there are more
    /// external links than before.
    /// If there are less external links than before, an empty vec![] is returned.
    ///
    fn set_external_links(&mut self, store: &Store, links: Vec<Url>) -> Result<Vec<StoreId>> {
        // Take all the links, generate a SHA sum out of each one, filter out the already existing
        // store entries and store the other URIs in the header of one FileLockEntry each, in
        // the path /link/external/<SHA of the URL>

        debug!("Iterating {} links = {:?}", links.len(), links);
        links.into_iter().map(|link| {
            let hash = hex::encode(Sha1::digest(&link.as_str().as_bytes()));
            let file_id =
                ModuleEntryPath::new(format!("external/{}", hash)).into_storeid()
                    .map_dbg_err(|_| {
                        format!("Failed to build StoreId for this hash '{:?}'", hash)
                    })
                ?;

            debug!("Link    = '{:?}'", link);
            debug!("Hash    = '{:?}'", hash);
            debug!("StoreId = '{:?}'", file_id);

            let link_already_exists = store.get(file_id.clone())?.is_some();

            // retrieve the file from the store, which implicitely creates the entry if it does not
            // exist
            let mut file = store
                .retrieve(file_id.clone())
                .map_dbg_err(|_| {
                    format!("Failed to create or retrieve an file for this link '{:?}'", link)
                })?;

            debug!("Generating header content!");
            {
                let hdr = file.deref_mut().get_header_mut();

                let mut table = match hdr.read("links.external.content")? {
                    Some(&Value::Table(ref table)) => table.clone(),
                    Some(_) => {
                        warn!("There is a value at 'links.external.content' which is not a table.");
                        warn!("Going to override this value");
                        BTreeMap::new()
                    },
                    None => BTreeMap::new(),
                };

                let v = Value::String(link.into_string());

                debug!("setting URL = '{:?}", v);
                table.insert(String::from("url"), v);

                let _ = hdr.insert("links.external.content", Value::Table(table))?;
                debug!("Setting URL worked");
            }

            // then add an internal link to the new file or return an error if this fails
            let _ = self.add_internal_link(file.deref_mut())?;
            debug!("Error adding internal link");

            Ok((link_already_exists, file_id))
        })
        .filter_map(|res| match res {
            Ok((exists, entry)) => if exists { Some(Ok(entry)) } else { None },
            Err(e) => Some(Err(e))
        })
        .collect()
    }

    /// Add an external link to the implementor object
    ///
    /// # Return Value
    ///
    /// (See ExternalLinker::set_external_links())
    ///
    /// Returns the StoreIds which were newly created for the new external links, if there are more
    /// external links than before.
    /// If there are less external links than before, an empty vec![] is returned.
    ///
    fn add_external_link(&mut self, store: &Store, link: Url) -> Result<Vec<StoreId>> {
        // get external links, add this one, save them
        debug!("Getting links");
        self.get_external_links(store)
            .and_then(|links| {
                let mut links = links.collect::<Result<Vec<_>>>()?;

                debug!("Adding link = '{:?}' to links = {:?}", link, links);
                links.push(link);

                debug!("Setting {} links = {:?}", links.len(), links);
                self.set_external_links(store, links)
            })
    }

    /// Remove an external link from the implementor object
    ///
    /// # Return Value
    ///
    /// (See ExternalLinker::set_external_links())
    ///
    /// Returns the StoreIds which were newly created for the new external links, if there are more
    /// external links than before.
    /// If there are less external links than before, an empty vec![] is returned.
    ///
    fn remove_external_link(&mut self, store: &Store, link: Url) -> Result<Vec<StoreId>> {
        // get external links, remove this one, save them
        self.get_external_links(store)
            .and_then(|links| {
                debug!("Removing link = '{:?}'", link);
                let links = links
                    .filter_map(Result::ok)
                    .filter(|l| l.as_str() != link.as_str())
                    .collect::<Vec<_>>();
                self.set_external_links(store, links)
            })
    }

}

#[cfg(test)]
mod tests {
    use super::*;
    use std::path::PathBuf;
    use std::sync::Arc;

    use libimagstore::store::Store;

    fn setup_logging() {
        use env_logger;
        let _ = env_logger::try_init();
    }

    pub fn get_store() -> Store {
        use libimagstore::file_abstraction::InMemoryFileAbstraction;
        let backend = Arc::new(InMemoryFileAbstraction::default());
        Store::new_with_backend(PathBuf::from("/"), &None, backend).unwrap()
    }


    #[test]
    fn test_simple() {
        setup_logging();
        let store = get_store();
        let mut e = store.retrieve(PathBuf::from("base-test_simple")).unwrap();
        let url   = Url::parse("http://google.de").unwrap();

        assert!(e.add_external_link(&store, url.clone()).is_ok());

        assert_eq!(1, e.get_external_links(&store).unwrap().count());
        assert_eq!(url, e.get_external_links(&store).unwrap().next().unwrap().unwrap());
    }

}