use std::borrow::Cow;
use std::collections::hash_map::Iter;
use std::collections::{BTreeMap, HashMap, HashSet};
use std::convert::{TryFrom, TryInto};
use std::mem;
use std::rc::Rc;
use std::str::FromStr;
use chrono::{DateTime, Local};
use globset::GlobBuilder;
use once_cell::unsync::OnceCell;
use serde::{Deserialize, Serialize};
use crate::ocfl::bimap::PathBiMap;
use crate::ocfl::consts::DEFAULT_CONTENT_DIR;
use crate::ocfl::digest::{DigestAlgorithm, HexDigest};
use crate::ocfl::error::{not_found, not_found_path, Result, RocflError};
use crate::ocfl::{
validate, CommitMeta, ContentPath, ContentPathVersion, Diff, InventoryPath, LogicalPath,
SpecVersion, VersionNum, VersionRef,
};
const STAGING_MESSAGE: &str = "Staging new version";
const ROCFL_USER: &str = "rocfl";
const ROCFL_ADDRESS: &str = "https://github.com/pwinckles/rocfl";
#[derive(Serialize, Debug)]
#[serde(rename_all = "camelCase")]
pub struct Inventory {
pub id: String,
#[serde(rename = "type")]
pub type_declaration: String,
pub digest_algorithm: DigestAlgorithm,
pub head: VersionNum,
#[serde(skip_serializing_if = "Option::is_none")]
pub content_directory: Option<String>,
manifest: PathBiMap<ContentPath>,
pub versions: BTreeMap<VersionNum, Version>,
#[serde(skip_serializing_if = "Option::is_none")]
fixity: Option<HashMap<String, HashMap<String, Vec<String>>>>,
#[serde(skip)]
pub object_root: String,
#[serde(skip)]
pub storage_path: String,
#[serde(skip)]
pub mutable_head: bool,
}
pub struct InventoryBuilder {
id: String,
type_declaration: String,
digest_algorithm: DigestAlgorithm,
head: VersionNum,
content_directory: String,
manifest: PathBiMap<ContentPath>,
versions: BTreeMap<VersionNum, Version>,
}
#[derive(Serialize, Debug, Clone)]
pub struct Version {
pub created: DateTime<Local>,
state: PathBiMap<LogicalPath>,
#[serde(skip_serializing_if = "Option::is_none")]
pub message: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub user: Option<User>,
#[serde(skip)]
logical_dirs: OnceCell<HashSet<LogicalPath>>,
}
#[derive(Deserialize, Serialize, Debug, Clone, Eq, PartialEq)]
pub struct User {
pub name: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub address: Option<String>,
}
impl Inventory {
#[allow(clippy::too_many_arguments)]
pub fn new(
id: String,
type_declaration: String,
digest_algorithm: DigestAlgorithm,
head: VersionNum,
content_directory: Option<String>,
manifest: PathBiMap<ContentPath>,
versions: BTreeMap<VersionNum, Version>,
fixity: Option<HashMap<String, HashMap<String, Vec<String>>>>,
) -> Result<Self> {
validate::validate_object_id(&id)?;
validate::validate_digest_algorithm(digest_algorithm)?;
if let Some(dir) = &content_directory {
validate::validate_content_dir(dir)?
}
if !versions.contains_key(&head) {
return Err(RocflError::CorruptObject {
object_id: id,
message: format!("HEAD version {} was not found", head),
});
}
Ok(Self {
id,
type_declaration,
digest_algorithm,
head,
content_directory,
manifest,
versions,
fixity,
object_root: Default::default(),
storage_path: Default::default(),
mutable_head: false,
})
}
pub fn builder(object_id: &str, version: SpecVersion) -> InventoryBuilder {
InventoryBuilder::new(object_id, version)
}
pub fn create_staging_head(&mut self) -> Result<()> {
let version_num = self.head.next()?;
let version = self.head_version().clone_staged();
self.versions.insert(version_num, version);
self.head = version_num;
Ok(())
}
pub fn is_new(&self) -> bool {
self.head.number == 1
}
pub fn spec_version(&self) -> Option<SpecVersion> {
SpecVersion::try_from_inventory_type(&self.type_declaration).ok()
}
pub fn head_version(&self) -> &Version {
self.versions.get(&self.head).unwrap()
}
pub fn head_version_mut(&mut self) -> &mut Version {
self.versions.get_mut(&self.head).unwrap()
}
pub fn get_version(&self, version_num: VersionNum) -> Result<&Version> {
match self.versions.get(&version_num) {
Some(v) => Ok(v),
None => Err(not_found(&self.id, Some(version_num))),
}
}
pub fn remove_version(&mut self, version_num: VersionNum) -> Result<Version> {
match self.versions.remove(&version_num) {
Some(v) => Ok(v),
None => Err(not_found(&self.id, Some(version_num))),
}
}
pub fn contains_content_path(&self, content_path: &ContentPath) -> bool {
self.manifest.contains_path(content_path)
}
pub fn digest_for_content_path(&self, content_path: &ContentPath) -> Option<&Rc<HexDigest>> {
self.manifest.get_id(content_path)
}
pub fn content_path_for_digest(
&self,
digest: &HexDigest,
version_num: VersionRef,
logical_path: Option<&LogicalPath>,
) -> Result<&Rc<ContentPath>> {
let version_num = version_num.resolve(self.head);
match self.manifest.get_paths(digest) {
Some(paths) => {
let mut matches = Vec::new();
for path in paths {
let current_version = match path.version {
ContentPathVersion::VersionNum(current_version) => current_version,
ContentPathVersion::MutableHead => self.head,
};
if current_version <= version_num {
matches.push(path);
}
}
if matches.is_empty() {
return Err(RocflError::CorruptObject {
object_id: self.id.clone(),
message: format!("Digest {} is not mapped to any content paths", digest),
});
} else if matches.len() > 1 && logical_path.is_some() {
let suffix = format!(
"/{}/{}",
self.defaulted_content_dir(),
logical_path.unwrap()
);
for path in &matches {
if path.ends_with(&suffix) {
return Ok(path);
}
}
}
Ok(matches.first().unwrap())
}
None => Err(RocflError::CorruptObject {
object_id: self.id.clone(),
message: format!("Digest {} not found in manifest", digest),
}),
}
}
pub fn content_path_for_logical_path(
&self,
logical_path: &LogicalPath,
version_num: VersionRef,
) -> Result<&Rc<ContentPath>> {
let version_num = version_num.resolve(self.head);
let version = self.get_version(version_num)?;
let digest = match version.lookup_digest(logical_path) {
Some(digest) => digest,
None => {
return Err(RocflError::NotFound(format!(
"Path {} not found in object {} version {}",
logical_path, self.id, version_num
)))
}
};
self.content_path_for_digest(digest, version_num.into(), Some(logical_path))
}
pub fn content_paths(&self, digest: &HexDigest) -> Option<&HashSet<Rc<ContentPath>>> {
self.manifest.get_paths(digest)
}
pub fn all_content_paths(&self) -> HashSet<Rc<ContentPath>> {
let mut paths = HashSet::with_capacity(self.manifest.len());
for (path, _) in &self.manifest {
paths.insert(path.clone());
}
paths
}
pub fn diff_versions(&self, left: Option<VersionNum>, right: VersionNum) -> Result<Vec<Diff>> {
if let Some(left) = left {
if left == right {
return Ok(Vec::new());
}
}
let left = match left {
Some(left) => Some(self.get_version(left)?),
None => {
if right.number > 1 {
Some(self.get_version(right.previous().unwrap())?)
} else {
None
}
}
};
Ok(self.get_version(right)?.diff(left))
}
pub fn dedup_head(&mut self) -> Vec<Rc<ContentPath>> {
let mut removed = Vec::new();
let prefix = format!("{}/", self.head);
let mut matches: HashMap<Rc<HexDigest>, HashSet<Rc<ContentPath>>> = HashMap::new();
for (digest, paths) in self.manifest.iter_id_paths() {
if paths.len() > 1 {
for path in paths {
if path.starts_with(&prefix) {
matches
.entry(digest.clone())
.or_insert_with(HashSet::new)
.insert(path.clone());
}
}
}
}
for (digest, paths) in matches {
let total = self.manifest.get_paths(&digest).unwrap().len();
if total == paths.len() {
let mut iter = paths.into_iter().peekable();
while let Some(path) = iter.next() {
if iter.peek().is_some() {
self.manifest.remove_path(&path);
removed.push(path);
}
}
} else {
for path in paths {
self.manifest.remove_path(&path);
removed.push(path);
}
}
}
removed
}
pub fn add_file_to_head(&mut self, digest: HexDigest, logical_path: LogicalPath) -> Result<()> {
let digest_rc = match self.manifest.get_id_rc(&digest) {
Some(digest_rc) => digest_rc.clone(),
None => Rc::new(digest),
};
let content_path = self.new_content_path(&logical_path);
self.manifest
.insert_rc(digest_rc.clone(), Rc::new(content_path));
self.head_version_mut().add_file(digest_rc, logical_path)
}
pub fn copy_file_to_head(
&mut self,
src_version_num: VersionNum,
src_path: &LogicalPath,
dst_path: LogicalPath,
) -> Result<()> {
let src_version = self.get_version(src_version_num)?;
let digest = match src_version.lookup_digest(src_path) {
Some(digest) => digest.clone(),
None => return Err(not_found_path(&self.id, src_version_num, src_path)),
};
self.head_version_mut().add_file(digest, dst_path)
}
pub fn move_file_in_head(
&mut self,
src_path: &LogicalPath,
dst_path: LogicalPath,
) -> Result<()> {
let head = self.head_version_mut();
let digest = match head.lookup_digest(src_path) {
Some(digest) => digest.clone(),
None => return Err(not_found_path(&self.id, self.head, src_path)),
};
head.add_file(digest, dst_path)?;
head.remove_file(src_path);
Ok(())
}
pub fn move_new_in_head_file(
&mut self,
digest: HexDigest,
src_path: &LogicalPath,
dst_path: LogicalPath,
) -> Result<()> {
let digest_rc = match self.manifest.get_id_rc(&digest) {
Some(digest_rc) => digest_rc.clone(),
None => Rc::new(digest),
};
let src_content_path = self.new_content_path(src_path);
self.manifest.remove_path(&src_content_path);
let content_path = self.new_content_path(&dst_path);
self.manifest
.insert_rc(digest_rc.clone(), Rc::new(content_path));
let head = self.head_version_mut();
head.add_file(digest_rc, dst_path)?;
head.remove_file(src_path);
Ok(())
}
pub fn remove_logical_path_from_head(
&mut self,
logical_path: &LogicalPath,
) -> Option<ContentPath> {
let head = self.head_version_mut();
if head.remove_file(logical_path).is_some() {
let content_path = self.new_content_path(logical_path);
if self.manifest.remove_path(&content_path).is_some() {
return Some(content_path);
}
}
None
}
pub fn new_content_path(&self, logical_path: &LogicalPath) -> ContentPath {
logical_path.to_content_path(self.head, self.defaulted_content_dir())
}
pub fn defaulted_content_dir(&self) -> &str {
match &self.content_directory {
Some(dir) => dir.as_str(),
None => DEFAULT_CONTENT_DIR,
}
}
pub fn manifest(&self) -> &PathBiMap<ContentPath> {
&self.manifest
}
#[allow(clippy::type_complexity)]
pub fn invert_fixity(
&self,
) -> Option<HashMap<ContentPath, Vec<(DigestAlgorithm, Rc<HexDigest>)>>> {
if let Some(fixity) = &self.fixity {
let mut inverted = HashMap::new();
for (algorithm, manifest) in fixity {
if algorithm.starts_with("blake2b") {
continue;
}
if let Ok(algorithm) = DigestAlgorithm::from_str(algorithm) {
for (digest, paths) in manifest {
let digest = Rc::new(HexDigest::from(digest.as_str()));
for path in paths {
inverted
.entry(ContentPath::try_from(path).unwrap())
.or_insert_with(Vec::new)
.push((algorithm, digest.clone()))
}
}
}
}
Some(inverted)
} else {
None
}
}
pub fn manifest_paths(&self) -> HashSet<Rc<ContentPath>> {
let mut paths = HashSet::with_capacity(self.manifest.len());
for (content_path, _) in &self.manifest {
paths.insert(content_path.clone());
}
paths
}
pub fn fixity_paths(&self) -> HashSet<&str> {
let mut paths = HashSet::new();
if let Some(fixity) = &self.fixity {
fixity
.values()
.flat_map(|m| m.values())
.flat_map(|v| v.iter())
.for_each(|path| {
paths.insert(path.as_ref());
});
}
paths
}
}
impl InventoryBuilder {
pub fn new(object_id: &str, version: SpecVersion) -> Self {
Self {
id: object_id.to_string(),
type_declaration: version.inventory_type().to_string(),
digest_algorithm: DigestAlgorithm::Sha512,
head: VersionNum::v1(),
content_directory: DEFAULT_CONTENT_DIR.to_string(),
manifest: PathBiMap::new(),
versions: BTreeMap::new(),
}
}
pub fn with_digest_algorithm(mut self, digest_algorithm: DigestAlgorithm) -> Self {
self.digest_algorithm = digest_algorithm;
self
}
pub fn with_head(mut self, head: VersionNum) -> Self {
self.head = head;
self
}
pub fn with_content_directory(mut self, content_directory: &str) -> Self {
self.content_directory = content_directory.to_string();
self
}
pub fn build(mut self) -> Result<Inventory> {
self.versions.insert(self.head, Version::new_staged());
Inventory::new(
self.id,
self.type_declaration,
self.digest_algorithm,
self.head,
Some(self.content_directory),
self.manifest,
self.versions,
None,
)
}
}
impl Version {
pub fn new(
created: DateTime<Local>,
state: PathBiMap<LogicalPath>,
message: Option<String>,
user: Option<User>,
) -> Self {
Self {
created,
state,
message,
user,
logical_dirs: Default::default(),
}
}
pub fn new_staged() -> Self {
Self::staged_version(PathBiMap::new())
}
pub fn clone_staged(&self) -> Self {
Self::staged_version(self.state.clone())
}
fn staged_version(state: PathBiMap<LogicalPath>) -> Self {
Self {
created: Local::now(),
message: Some(STAGING_MESSAGE.to_string()),
user: Some(User {
name: Some(ROCFL_USER.to_string()),
address: Some(ROCFL_ADDRESS.to_string()),
}),
state,
logical_dirs: OnceCell::default(),
}
}
pub fn update_meta(&mut self, meta: CommitMeta) {
self.message = meta.message;
self.user = match meta.user_name {
Some(name) => Some(User::new(name, meta.user_address)),
None => None,
};
self.created = meta.created.unwrap_or_else(Local::now);
}
pub fn state_iter(&self) -> Iter<Rc<LogicalPath>, Rc<HexDigest>> {
self.state.iter()
}
pub fn logical_paths(&self) -> HashSet<Rc<LogicalPath>> {
let mut paths = HashSet::with_capacity(self.state.len());
for (path, _) in &self.state {
paths.insert(path.clone());
}
paths
}
pub fn remove_state(&mut self) -> PathBiMap<LogicalPath> {
if self.logical_dirs.get().is_some() {
self.logical_dirs = OnceCell::default();
}
mem::replace(&mut self.state, PathBiMap::new())
}
pub fn lookup_digest(&self, logical_path: &LogicalPath) -> Option<&Rc<HexDigest>> {
self.state.get_id(logical_path)
}
pub fn exists(&self, path: &LogicalPath) -> bool {
self.is_file(path) || self.is_dir(path)
}
pub fn is_file(&self, path: &LogicalPath) -> bool {
self.state.contains_path(path)
}
pub fn is_dir(&self, path: &LogicalPath) -> bool {
self.get_logical_dirs().contains(path)
}
pub fn contains_digest(&self, digest: &HexDigest) -> bool {
self.state.contains_id(digest)
}
pub fn validate_non_conflicting(&self, path: &LogicalPath) -> Result<()> {
if self.is_dir(path) {
return Err(RocflError::IllegalState(format!(
"Conflicting logical path {}: This path is already in use as a directory",
path
)));
}
for dir in create_logical_dirs(path) {
if self.is_file(&dir) {
return Err(RocflError::IllegalState(format!(
"Conflicting logical path {}: The path part {} is an existing logical file",
path, dir
)));
}
}
Ok(())
}
pub fn resolve_glob(&self, glob: &str, recursive: bool) -> Result<HashSet<Rc<LogicalPath>>> {
let mut matches = HashSet::new();
let mut glob = glob.trim_start_matches('/');
if glob.is_empty() {
glob = "*";
}
let glob_trailing_slash = glob.ends_with('/');
let matcher = GlobBuilder::new(glob)
.literal_separator(true)
.backslash_escape(true)
.build()?
.compile_matcher();
for (path, _digest) in &self.state {
if matcher.is_match(path.as_path()) {
matches.insert(path.clone());
}
}
if recursive {
for dir in self.get_logical_dirs() {
if (glob_trailing_slash && matcher.is_match(format!("{}/", dir)))
|| (!glob_trailing_slash && matcher.is_match(dir.as_path()))
{
matches.extend(self.paths_with_prefix(dir.as_str()));
}
}
}
Ok(matches)
}
pub fn resolve_glob_to_dirs(&self, glob: &str) -> Result<HashSet<&LogicalPath>> {
let mut matches = HashSet::new();
let glob = glob.trim_start_matches('/');
let matcher = GlobBuilder::new(glob)
.literal_separator(true)
.backslash_escape(true)
.build()?
.compile_matcher();
for dir in self.get_logical_dirs() {
if matcher.is_match(dir.as_str()) {
matches.insert(dir);
}
}
Ok(matches)
}
pub fn paths_with_prefix(&self, prefix: &str) -> Vec<Rc<LogicalPath>> {
let mut matches = Vec::new();
let prefix = if !prefix.ends_with('/') && !prefix.is_empty() {
Cow::Owned(format!("{}/", prefix))
} else {
prefix.into()
};
for (path, _digest) in &self.state {
if path.starts_with(prefix.as_ref()) {
matches.push(path.clone());
}
}
matches
}
pub fn diff(&self, other: Option<&Version>) -> Vec<Diff> {
let mut diffs = Vec::new();
let mut deletes: HashMap<Rc<HexDigest>, Vec<Rc<LogicalPath>>> = HashMap::new();
if let Some(left) = other {
let mut seen = HashSet::with_capacity(left.state.len());
for (path, left_digest) in left.state_iter() {
match self.lookup_digest(path) {
None => {
deletes
.entry(left_digest.clone())
.or_insert_with(Vec::new)
.push(path.clone());
}
Some(right_digest) => {
seen.insert(path.clone());
if left_digest != right_digest {
diffs.push(Diff::Modified(path.clone()))
}
}
}
}
let mut renames: HashMap<Rc<HexDigest>, Diff> = HashMap::new();
for (path, digest) in self.state_iter() {
if seen.contains(path) {
continue;
}
if let Some(original) = deletes.remove(digest) {
let renamed = vec![path.clone()];
renames.insert(digest.clone(), Diff::Renamed { original, renamed });
} else if let Some(Diff::Renamed {
original: _,
renamed,
}) = renames.get_mut(digest)
{
renamed.push(path.clone());
} else {
diffs.push(Diff::Added(path.clone()));
}
}
for (_digest, deletes) in deletes {
for delete in deletes {
diffs.push(Diff::Deleted(delete));
}
}
for (_digest, mut rename) in renames {
if let Diff::Renamed { original, renamed } = &mut rename {
original.sort_unstable();
renamed.sort_unstable();
}
diffs.push(rename);
}
} else {
for (path, _digest) in self.state_iter() {
diffs.push(Diff::Added(path.clone()));
}
}
diffs
}
fn add_file(&mut self, digest: Rc<HexDigest>, logical_path: LogicalPath) -> Result<()> {
self.validate_non_conflicting(&logical_path)?;
if let Some(dirs) = self.logical_dirs.get_mut() {
dirs.extend(create_logical_dirs(&logical_path));
}
self.state.insert_rc(digest, Rc::new(logical_path));
Ok(())
}
fn remove_file(&mut self, path: &LogicalPath) -> Option<(Rc<LogicalPath>, Rc<HexDigest>)> {
if self.logical_dirs.get().is_some() {
self.logical_dirs = OnceCell::default();
}
self.state.remove_path(path)
}
fn get_logical_dirs(&self) -> &HashSet<LogicalPath> {
self.logical_dirs.get_or_init(|| {
let mut dirs: HashSet<LogicalPath> = HashSet::with_capacity(self.state.len());
dirs.insert("/".try_into().unwrap());
for (path, _) in self.state.iter() {
dirs.extend(create_logical_dirs(path));
}
dirs
})
}
}
impl User {
pub fn new(name: String, address: Option<String>) -> Self {
Self {
name: Some(name),
address,
}
}
}
fn create_logical_dirs(path: &LogicalPath) -> HashSet<LogicalPath> {
let mut dirs = HashSet::new();
let mut parent = path.parent();
while !parent.is_empty() {
let next = parent.parent();
dirs.insert(parent);
parent = next;
}
dirs
}