use std::borrow::{Borrow, Cow};
use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::fmt::Write;
use std::num::NonZeroU32;
use std::rc::Rc;
use crate::objectsource::{ContentID, ObjectMeta, ObjectMetaMap, ObjectSourceMeta};
use crate::objgv::*;
use anyhow::{anyhow, Result};
use camino::Utf8PathBuf;
use gvariant::aligned_bytes::TryAsAligned;
use gvariant::{Marker, Structure};
use ostree::{gio, glib};
use serde::{Deserialize, Serialize};
pub(crate) const MAX_CHUNKS: u32 = 64;
type RcStr = Rc<str>;
pub(crate) type ChunkMapping = BTreeMap<RcStr, (u64, Vec<Utf8PathBuf>)>;
#[derive(Debug, Default)]
pub(crate) struct Chunk {
pub(crate) name: String,
pub(crate) content: ChunkMapping,
pub(crate) size: u64,
}
#[derive(Debug, Deserialize, Serialize)]
pub struct ObjectSourceMetaSized {
#[serde(flatten)]
meta: ObjectSourceMeta,
size: u64,
}
#[derive(Debug)]
pub struct ObjectMetaSized {
pub map: ObjectMetaMap,
pub sizes: Vec<ObjectSourceMetaSized>,
}
impl ObjectMetaSized {
pub fn compute_sizes(repo: &ostree::Repo, meta: ObjectMeta) -> Result<ObjectMetaSized> {
let cancellable = gio::Cancellable::NONE;
let map = meta.map;
let mut set = meta.set;
let mut sizes = HashMap::<&str, u64>::new();
for (checksum, contentid) in map.iter() {
let finfo = repo.query_file(checksum, cancellable)?.0;
let sz = sizes.entry(contentid).or_default();
*sz += finfo.size() as u64;
}
let sized: Result<Vec<_>> = sizes
.into_iter()
.map(|(id, size)| -> Result<ObjectSourceMetaSized> {
set.take(id)
.ok_or_else(|| anyhow!("Failed to find {} in content set", id))
.map(|meta| ObjectSourceMetaSized { meta, size })
})
.collect();
let mut sizes = sized?;
sizes.sort_by(|a, b| b.size.cmp(&a.size));
Ok(ObjectMetaSized { map, sizes })
}
}
#[derive(Debug, Default)]
pub struct Chunking {
pub(crate) metadata_size: u64,
pub(crate) remainder: Chunk,
pub(crate) chunks: Vec<Chunk>,
pub(crate) max: u32,
processed_mapping: bool,
pub(crate) n_provided_components: u32,
pub(crate) n_sized_components: u32,
}
#[derive(Default)]
struct Generation {
path: Utf8PathBuf,
metadata_size: u64,
dirtree_found: BTreeSet<RcStr>,
dirmeta_found: BTreeSet<RcStr>,
}
fn push_dirmeta(repo: &ostree::Repo, gen: &mut Generation, checksum: &str) -> Result<()> {
if gen.dirtree_found.contains(checksum) {
return Ok(());
}
let checksum = RcStr::from(checksum);
gen.dirmeta_found.insert(RcStr::clone(&checksum));
let child_v = repo.load_variant(ostree::ObjectType::DirMeta, checksum.borrow())?;
gen.metadata_size += child_v.data_as_bytes().as_ref().len() as u64;
Ok(())
}
fn push_dirtree(
repo: &ostree::Repo,
gen: &mut Generation,
checksum: &str,
) -> Result<glib::Variant> {
let child_v = repo.load_variant(ostree::ObjectType::DirTree, checksum)?;
if !gen.dirtree_found.contains(checksum) {
gen.metadata_size += child_v.data_as_bytes().as_ref().len() as u64;
} else {
let checksum = RcStr::from(checksum);
gen.dirtree_found.insert(checksum);
}
Ok(child_v)
}
fn generate_chunking_recurse(
repo: &ostree::Repo,
gen: &mut Generation,
chunk: &mut Chunk,
dt: &glib::Variant,
) -> Result<()> {
let dt = dt.data_as_bytes();
let dt = dt.try_as_aligned()?;
let dt = gv_dirtree!().cast(dt);
let (files, dirs) = dt.to_tuple();
let mut hexbuf = [0u8; 64];
for file in files {
let (name, csum) = file.to_tuple();
let fpath = gen.path.join(name.to_str());
hex::encode_to_slice(csum, &mut hexbuf)?;
let checksum = std::str::from_utf8(&hexbuf)?;
let meta = repo.query_file(checksum, gio::Cancellable::NONE)?.0;
let size = meta.size() as u64;
let entry = chunk.content.entry(RcStr::from(checksum)).or_default();
entry.0 = size;
let first = entry.1.is_empty();
if first {
chunk.size += size;
}
entry.1.push(fpath);
}
for item in dirs {
let (name, contents_csum, meta_csum) = item.to_tuple();
let name = name.to_str();
gen.path.push(name);
hex::encode_to_slice(contents_csum, &mut hexbuf)?;
let checksum_s = std::str::from_utf8(&hexbuf)?;
let dirtree_v = push_dirtree(repo, gen, checksum_s)?;
generate_chunking_recurse(repo, gen, chunk, &dirtree_v)?;
drop(dirtree_v);
hex::encode_to_slice(meta_csum, &mut hexbuf)?;
let checksum_s = std::str::from_utf8(&hexbuf)?;
push_dirmeta(repo, gen, checksum_s)?;
assert!(gen.path.pop());
}
Ok(())
}
impl Chunk {
fn new(name: &str) -> Self {
Chunk {
name: name.to_string(),
..Default::default()
}
}
fn move_obj(&mut self, dest: &mut Self, checksum: &str) -> bool {
if let Some((name, (size, paths))) = self.content.remove_entry(checksum) {
let v = dest.content.insert(name, (size, paths));
debug_assert!(v.is_none());
self.size -= size;
dest.size += size;
true
} else {
false
}
}
}
impl Chunking {
pub fn new(repo: &ostree::Repo, rev: &str) -> Result<Self> {
let rev = repo.require_rev(rev)?;
let (commit_v, _) = repo.load_commit(&rev)?;
let commit_v = commit_v.data_as_bytes();
let commit_v = commit_v.try_as_aligned()?;
let commit = gv_commit!().cast(commit_v);
let commit = commit.to_tuple();
let mut gen = Generation {
path: Utf8PathBuf::from("/"),
..Default::default()
};
let mut chunk: Chunk = Default::default();
let contents_checksum = &hex::encode(commit.6);
let contents_v = repo.load_variant(ostree::ObjectType::DirTree, contents_checksum)?;
push_dirtree(repo, &mut gen, contents_checksum)?;
let meta_checksum = &hex::encode(commit.7);
push_dirmeta(repo, &mut gen, meta_checksum.as_str())?;
generate_chunking_recurse(repo, &mut gen, &mut chunk, &contents_v)?;
let chunking = Chunking {
metadata_size: gen.metadata_size,
remainder: chunk,
..Default::default()
};
Ok(chunking)
}
pub fn from_mapping(
repo: &ostree::Repo,
rev: &str,
meta: ObjectMetaSized,
max_layers: Option<NonZeroU32>,
) -> Result<Self> {
let mut r = Self::new(repo, rev)?;
r.process_mapping(meta, max_layers)?;
Ok(r)
}
fn remaining(&self) -> u32 {
self.max.saturating_sub(self.chunks.len() as u32)
}
#[allow(clippy::or_fun_call)]
pub fn process_mapping(
&mut self,
meta: ObjectMetaSized,
max_layers: Option<NonZeroU32>,
) -> Result<()> {
self.max = max_layers
.unwrap_or(NonZeroU32::new(MAX_CHUNKS).unwrap())
.get();
let sizes = &meta.sizes;
assert!(!self.processed_mapping);
self.processed_mapping = true;
let remaining = self.remaining();
if remaining == 0 {
return Ok(());
}
let mut rmap = HashMap::<ContentID, Vec<&String>>::new();
for (checksum, contentid) in meta.map.iter() {
rmap.entry(Rc::clone(contentid)).or_default().push(checksum);
}
self.n_provided_components = meta.sizes.len().try_into().unwrap();
self.n_sized_components = sizes
.iter()
.filter(|v| v.size > 0)
.count()
.try_into()
.unwrap();
let packing = basic_packing(sizes, NonZeroU32::new(self.max).unwrap());
for bin in packing.into_iter() {
let first = bin[0];
let first_name = &*first.meta.name;
let name = match bin.len() {
0 => unreachable!(),
1 => Cow::Borrowed(first_name),
2..=5 => {
let r = bin.iter().map(|v| &*v.meta.name).fold(
String::from(first_name),
|mut acc, v| {
write!(acc, " and {}", v).unwrap();
acc
},
);
Cow::Owned(r)
}
n => Cow::Owned(format!("{n} components")),
};
let mut chunk = Chunk::new(&*name);
for szmeta in bin {
for &obj in rmap.get(&szmeta.meta.identifier).unwrap() {
self.remainder.move_obj(&mut chunk, obj.as_str());
}
}
if !chunk.content.is_empty() {
self.chunks.push(chunk);
}
}
assert_eq!(self.remainder.content.len(), 0);
Ok(())
}
pub(crate) fn take_chunks(&mut self) -> Vec<Chunk> {
let mut r = Vec::new();
std::mem::swap(&mut self.chunks, &mut r);
r
}
pub fn print(&self) {
println!("Metadata: {}", glib::format_size(self.metadata_size));
if self.n_provided_components > 0 {
println!(
"Components: provided={} sized={}",
self.n_provided_components, self.n_sized_components
);
}
for (n, chunk) in self.chunks.iter().enumerate() {
let sz = glib::format_size(chunk.size);
println!(
"Chunk {}: \"{}\": objects:{} size:{}",
n,
chunk.name,
chunk.content.len(),
sz
);
}
if !self.remainder.content.is_empty() {
let sz = glib::format_size(self.remainder.size);
println!(
"Remainder: \"{}\": objects:{} size:{}",
self.remainder.name,
self.remainder.content.len(),
sz
);
}
}
}
type ChunkedComponents<'a> = Vec<&'a ObjectSourceMetaSized>;
fn components_size(components: &[&ObjectSourceMetaSized]) -> u64 {
components.iter().map(|k| k.size).sum()
}
#[cfg(test)]
fn packing_size(packing: &[ChunkedComponents]) -> u64 {
packing.iter().map(|v| components_size(v)).sum()
}
fn sort_packing(packing: &mut [ChunkedComponents]) {
packing.sort_by(|a, b| {
let a: u64 = components_size(a);
let b: u64 = components_size(b);
b.cmp(&a)
});
}
fn basic_packing(components: &[ObjectSourceMetaSized], bins: NonZeroU32) -> Vec<ChunkedComponents> {
let mut r = Vec::new();
if components.len() <= bins.get() as usize {
r.extend(components.iter().map(|v| vec![v]));
return r;
}
let mut components: Vec<_> = components.iter().collect();
let mut by_src = HashMap::<_, Vec<&ObjectSourceMetaSized>>::new();
for component in components.split_off(bins.get() as usize) {
by_src
.entry(&component.meta.srcid)
.or_default()
.push(component);
}
r.extend(components.into_iter().map(|v| vec![v]));
r.extend(by_src.into_values());
sort_packing(&mut r);
if r.len() <= bins.get() as usize {
return r;
}
let last = (bins.get().checked_sub(1).unwrap()) as usize;
if let Some(tail) = r.drain(last..).reduce(|mut a, b| {
a.extend(b.into_iter());
a
}) {
r.push(tail);
}
assert!(r.len() <= bins.get() as usize);
r
}
#[cfg(test)]
mod test {
use super::*;
const FCOS_CONTENTMETA: &[u8] = include_bytes!("fixtures/fedora-coreos-contentmeta.json.gz");
#[test]
fn test_packing_basics() -> Result<()> {
for v in [1u32, 7].map(|v| NonZeroU32::new(v).unwrap()) {
assert_eq!(basic_packing(&[], v).len(), 0);
}
Ok(())
}
#[test]
fn test_packing_fcos() -> Result<()> {
let contentmeta: Vec<ObjectSourceMetaSized> =
serde_json::from_reader(flate2::read::GzDecoder::new(FCOS_CONTENTMETA))?;
let total_size = contentmeta.iter().map(|v| v.size).sum::<u64>();
let packing = basic_packing(&contentmeta, NonZeroU32::new(MAX_CHUNKS).unwrap());
assert!(!contentmeta.is_empty());
assert_eq!(packing.len() as u32, MAX_CHUNKS);
let packed_total_size = packing_size(&packing);
assert_eq!(total_size, packed_total_size);
Ok(())
}
}