1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
use mediatype::MediaTypeBuf;
use serde::{Serialize,Deserialize};
use crate::database::id::{CrawlLogEntryId, RequestId};
use crate::time::UtcTimestamp;
use crate::url::UrlWithoutFragment;
/// File Metadata
#[derive(Clone,Debug,Serialize,Deserialize)]
pub struct File {
/// Numeric id of the entry in the crawl_log
#[serde(skip)]
pub crawl_log_entry: CrawlLogEntryId,
/// Numeric id of the request that resulted in this file
/// (if it was fetched over a network)
#[serde(skip)]
pub request_id: Option<RequestId>,
/// The url that can be used to obtain an up to date copy of the file.
pub url: UrlWithoutFragment,
/// When the file was last modified according to file metadata.
///
/// This date may come from Filesystem metadata, Archive metadata,
/// the HTTP `last-modified` header or similar.
pub last_modified: Option<UtcTimestamp>,
// File specific part ----------------------------------------------
/// The mimetype of the file
pub mime: MediaTypeBuf,
/// The size of the file if known
pub size: Option<u64>,
/// If the file claimed to be the non-canonical version of another
/// resource this will be the url the file claimed is its canonical version.
/// (This implies that the file contents are unlikely to be indexed)
///
/// See [RFC 6596](https://www.rfc-editor.org/rfc/rfc6596).
pub canonical_url: Option<UrlWithoutFragment>
}