es3 0.1.0

Library for parsing, extracting, and verifying ES3 dossier files
Documentation
use std::fmt;

use serde::{Deserialize, Serialize};

use crate::parsed::{ParsedDocument, ValidationMessage, ValidationSeverity};
use crate::transform::{ExtractionUnavailableReason, Transform, TransformChain};

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct MimeType {
    #[serde(rename = "type")]
    top_level_type: String,
    subtype: String,
    extension: Option<String>,
    charset: Option<String>,
}

impl MimeType {
    pub fn top_level_type(&self) -> &str {
        &self.top_level_type
    }

    pub fn subtype(&self) -> &str {
        &self.subtype
    }

    pub fn extension(&self) -> Option<&str> {
        self.extension.as_deref()
    }

    pub fn charset(&self) -> Option<&str> {
        self.charset.as_deref()
    }

    pub(crate) fn from_node(node: roxmltree::Node<'_, '_>) -> Self {
        Self {
            top_level_type: node.attribute("type").unwrap_or_default().to_owned(),
            subtype: node.attribute("subtype").unwrap_or_default().to_owned(),
            extension: non_empty_attribute(node, "extension"),
            charset: non_empty_attribute(node, "charset")
                .or_else(|| non_empty_attribute(node, "charSet")),
        }
    }
}

impl fmt::Display for MimeType {
    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(formatter, "{}/{}", self.top_level_type, self.subtype)
    }
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct DocumentEntry {
    index: usize,
    title: String,
    mime_type: MimeType,
    source_size: u64,
    transforms: Vec<Transform>,
    extraction: ExtractionCapability,
    signature_count: usize,
    timestamp_count: usize,
}

impl DocumentEntry {
    pub fn index(&self) -> usize {
        self.index
    }

    pub fn title(&self) -> &str {
        &self.title
    }

    pub fn mime_type(&self) -> &MimeType {
        &self.mime_type
    }

    pub fn source_size(&self) -> u64 {
        self.source_size
    }

    pub fn transforms(&self) -> &[Transform] {
        &self.transforms
    }

    pub fn extraction(&self) -> &ExtractionCapability {
        &self.extraction
    }

    pub fn signature_count(&self) -> usize {
        self.signature_count
    }

    pub fn timestamp_count(&self) -> usize {
        self.timestamp_count
    }

    pub fn suggested_filename(&self) -> String {
        crate::output::filename_for(self)
    }

    pub fn can_extract(&self) -> bool {
        self.extraction.can_extract()
    }

    pub fn unavailable_reason(&self) -> Option<&str> {
        self.extraction.unavailable_reason()
    }

    pub fn unavailable_reason_code(&self) -> Option<ExtractionUnavailableReason> {
        self.extraction.unavailable_reason_code()
    }
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ExtractionCapability {
    can_extract: bool,
    unavailable_reason: Option<String>,
    unavailable_reason_code: Option<ExtractionUnavailableReason>,
}

impl ExtractionCapability {
    pub fn can_extract(&self) -> bool {
        self.can_extract
    }

    pub fn unavailable_reason(&self) -> Option<&str> {
        self.unavailable_reason.as_deref()
    }

    pub fn unavailable_reason_code(&self) -> Option<ExtractionUnavailableReason> {
        self.unavailable_reason_code
    }

    pub(crate) fn from_chain(transform_chain: &TransformChain) -> Self {
        let support = transform_chain.extraction_support();
        let unavailable_reason_code = support.unavailable_reason_code();

        Self {
            can_extract: support.can_extract(),
            unavailable_reason: unavailable_reason_code.map(|reason| reason.message().to_owned()),
            unavailable_reason_code,
        }
    }
}

#[derive(Debug, Clone)]
pub(crate) struct StoredDocument {
    pub(crate) entry: DocumentEntry,
    pub(crate) payload_text: String,
    pub(crate) transform_chain: TransformChain,
    pub(crate) structural_warnings: Vec<ValidationMessage>,
}

impl StoredDocument {
    pub(crate) fn from_parsed(document: ParsedDocument) -> Self {
        let index = document.index;
        let profile = document
            .profile()
            .expect("validated parsed document must have a profile");
        let title = profile
            .title()
            .expect("validated parsed document profile must have a title")
            .to_owned();
        let mime_type = profile
            .mime_type()
            .expect("validated parsed document profile must have a MIME type")
            .clone();
        let source_size = profile
            .source_size()
            .expect("validated parsed document profile must have a valid source size");
        let payload_text = document
            .payload_text()
            .expect("validated parsed document must have payload text")
            .to_owned();
        let transforms = document.valid_transforms();
        let transform_chain = TransformChain::try_from(transforms.as_slice())
            .expect("validated parsed document must have a valid transform chain");
        let extraction = ExtractionCapability::from_chain(&transform_chain);
        let structural_warnings = document
            .validation_messages()
            .into_iter()
            .filter(|message| message.severity == ValidationSeverity::Warning)
            .collect();

        Self {
            entry: DocumentEntry {
                index,
                title,
                mime_type,
                source_size,
                transforms,
                extraction,
                signature_count: document.signature_count,
                timestamp_count: document.timestamp_count,
            },
            payload_text,
            transform_chain,
            structural_warnings,
        }
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ExtractedDocument {
    pub entry: DocumentEntry,
    pub filename: String,
    pub bytes: Vec<u8>,
}

fn non_empty_attribute(node: roxmltree::Node<'_, '_>, name: &str) -> Option<String> {
    node.attribute(name)
        .map(str::trim)
        .filter(|value| !value.is_empty())
        .map(ToOwned::to_owned)
}