use mediatype::MediaTypeBuf;
use serde::{Serialize, Deserialize};
use crate::crawling::File;
use crate::summary::TextPile;
use crate::time::UtcTimestamp;
use crate::url::UrlWithoutFragment;
#[derive(Debug,Clone,Serialize,Deserialize)]
pub struct FileSummary {
pub mime_type: Option<MediaTypeBuf>,
pub size: Option<u64>,
pub canonical_url: Option<UrlWithoutFragment>,
pub text: Option<TextPile>,
pub link_stats: Option<LinkStatistics>,
pub description: DocumentDescription,
}
impl FileSummary {
pub fn from_components(
file: File,
document_description: DocumentDescription,
text_pile: Option<TextPile>,
link_stats: Option<LinkStatistics>,
) -> FileSummary {
return FileSummary {
mime_type: Some(file.mime),
size: file.size,
canonical_url: file.canonical_url,
text: text_pile,
link_stats: link_stats,
description: document_description,
}
}
}
#[derive(Debug,Clone,Serialize,Deserialize,Default)]
pub struct LinkStatistics {
incoming_internal_links: u64,
incoming_external_links: u64,
outgoing_internal_links: u64,
outgoing_external_links: u64,
mutual_internal_links: u64,
mutual_external_links: u64,
known_dead_internal_links: u64,
known_dead_external_links: u64,
}
#[derive(Debug,Clone,Serialize,Deserialize,Default)]
pub struct DocumentDescription {
pub title: Option<String>,
pub primary_headline: Option<String>,
pub description: Option<String>,
pub primary_language: Option<String>,
pub date_published: Option<UtcTimestamp>,
pub date_last_updated: Option<UtcTimestamp>,
pub indexiness: i64,
}