use crate::error::PayloadError;
use crate::manifest::Manifest;
use crate::metadata::{Metadata, MetadataFile, MetadataFileError, KEY_ENCODING, KEY_VERSION};
use crate::{BagIt, ChecksumAlgorithm};
use digest::Digest;
use std::path::Path;
use tokio::fs;
#[derive(thiserror::Error, Debug, PartialEq)]
pub enum BagDeclarationError {
#[error("Missing `bagit.txt` file")]
Missing,
#[error(transparent)]
Metadata(#[from] MetadataFileError),
#[error("Wrong tag {0}")]
Tag(&'static str),
#[error("Wrong number of tags for `bagit.txt` file")]
NumberTags,
}
#[derive(thiserror::Error, Debug, PartialEq)]
pub enum ReadError {
#[error("Path is not a directory")]
NotDirectory,
#[error("Bag declaration `bagit.txt`: {0}")]
BagDeclaration(#[from] BagDeclarationError),
#[error("Bag info `bag-info.txt`: {0}")]
BagInfo(#[from] MetadataFileError),
#[error("Bag info incorrect Oxum: {0}")]
BagInfoOxum(&'static str),
#[error("Listing checksum files")]
ListChecksumFiles(std::io::ErrorKind),
#[error("Requested algorithm is missing")]
NotRequestedAlgorithm,
#[error("Failed to open file")]
OpenFile(std::io::ErrorKind),
#[error("Failed to read a line in file")]
ReadLine(std::io::ErrorKind),
#[error("Failed to process a line in checksum file: {0}")]
ProcessManifestLine(#[from] PayloadError),
}
impl<'a, 'algo> BagIt<'a, 'algo> {
pub async fn read_existing<ChecksumAlgo: Digest + 'algo>(
bag_it_directory: impl AsRef<Path>,
checksum_algorithm: &'algo ChecksumAlgorithm<ChecksumAlgo>,
) -> Result<BagIt<'a, 'algo>, ReadError> {
if !bag_it_directory.as_ref().is_dir() {
return Err(ReadError::NotDirectory);
}
let path_bagit = bag_it_directory.as_ref().join("bagit.txt");
if !path_bagit.exists() {
return Err(ReadError::BagDeclaration(BagDeclarationError::Missing));
}
let bagit_file = MetadataFile::read(path_bagit)
.await
.map_err(|e| ReadError::BagDeclaration(e.into()))?;
let mut bagit_file = bagit_file.tags();
match bagit_file.next() {
Some(Metadata::BagitVersion { .. }) => (),
_ => return Err(BagDeclarationError::Tag(KEY_VERSION).into()),
}
match bagit_file.next() {
Some(Metadata::Encoding) => (),
_ => return Err(BagDeclarationError::Tag(KEY_ENCODING).into()),
}
if bagit_file.next().is_some() {
return Err(BagDeclarationError::NumberTags.into());
}
let path_baginfo = bag_it_directory.as_ref().join("bag-info.txt");
let bag_info = if path_baginfo.exists() {
Some(
MetadataFile::read(path_baginfo)
.await
.map_err(ReadError::BagInfo)?,
)
} else {
None
};
let mut dir = fs::read_dir(bag_it_directory.as_ref())
.await
.map_err(|e| ReadError::ListChecksumFiles(e.kind()))?;
let mut files_in_dir = Vec::new();
while let Some(entry) = dir
.next_entry()
.await
.map_err(|e| ReadError::ListChecksumFiles(e.kind()))?
{
let path = entry.path();
files_in_dir.push(path);
}
let payloads = Manifest::find_manifest(files_in_dir.as_ref(), checksum_algorithm)
.await?
.ok_or(ReadError::NotRequestedAlgorithm)?
.get_validate_payloads::<ChecksumAlgo>(bag_it_directory.as_ref())
.await?;
if let Some(ref bag_info) = bag_info {
for tag in bag_info.tags() {
if let Metadata::PayloadOctetStreamSummary {
octet_count,
stream_count,
} = tag
{
if *stream_count != payloads.len() {
return Err(ReadError::BagInfoOxum("stream_count"));
}
let payload_bytes_sum = payloads.iter().map(|payload| payload.bytes()).sum();
if *octet_count != payload_bytes_sum {
return Err(ReadError::BagInfoOxum("octet_count"));
}
}
}
}
if let Some(tag_manifest) =
Manifest::find_tag_manifest(files_in_dir.as_ref(), checksum_algorithm).await?
{
tag_manifest
.get_validate_payloads::<ChecksumAlgo>(bag_it_directory.as_ref())
.await?;
}
let tags = bag_info
.map(|file| file.consume_tags().into_iter().collect())
.unwrap_or_default();
Ok(BagIt {
path: bag_it_directory.as_ref().to_path_buf(),
items: payloads,
checksum_algorithm: checksum_algorithm.algorithm(),
tags,
})
}
}
#[cfg(test)]
mod test {
use crate::{
error::ReadError, metadata::Metadata, Algorithm, BagIt, ChecksumAlgorithm, Payload,
};
#[cfg(feature = "date")]
use jiff::civil::Date;
use md5::Md5;
use sha2::Sha256;
#[tokio::test]
async fn bag_with_date_sha256() {
let mut bagit_directory = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
bagit_directory.push("tests/sample-bag");
let algo = ChecksumAlgorithm::<Sha256>::new(Algorithm::Sha256);
let bag = BagIt::read_existing(&bagit_directory, &algo).await.unwrap();
let expected = BagIt::from_existing_items(
bagit_directory,
vec![
Payload::test_payload(
"data/bagit.md",
"eccdbbade12ba878af8f2140cb00c914f427405a987de2670e5c3014faf59f8e",
6302,
),
Payload::test_payload(
"data/paper_bag.jpg",
"2b22a8fd0dc46cbdc7a67b6cf588a03a8dd6f8ea23ce0b02e921ca5d79930bb2",
19895,
),
Payload::test_payload(
"data/rfc8493.txt",
"4964147d2e6e16442d4a6dbfbe68178a8f33c3e791c06d68a8b33f51ad821537",
48783,
),
Payload::test_payload(
"data/sources.csv",
"0fe3bd6e7c36aa2c979f3330037b220c5ca88ed0eabf16622202dc0b33c44e72",
369,
),
Payload::test_payload(
"data/totebag.jpg",
"38ff57167d746859f6383e80eb84ec0dd84de2ab1ed126ad317e73fbf502fb31",
10417,
),
],
algo.algorithm(),
vec![
#[cfg(feature = "date")]
Metadata::BaggingDate(Date::new(2024, 7, 11).unwrap()),
#[cfg(not(feature = "date"))]
Metadata::Custom {
key: "Bagging-Date".into(),
value: "2024-07-11".into(),
},
Metadata::PayloadOctetStreamSummary {
octet_count: 85766,
stream_count: 5,
},
],
)
.unwrap();
assert_eq!(bag, expected);
}
#[tokio::test]
async fn basic_bag_wrong_algorithm_md5() {
let mut bagit_directory = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
bagit_directory.push("tests/sample-bag/");
let algo = ChecksumAlgorithm::<Md5>::new(Algorithm::Custom("md5"));
assert_eq!(
BagIt::read_existing(&bagit_directory, &algo).await,
Err(ReadError::NotRequestedAlgorithm)
);
}
}