use std::collections::HashMap;
#[derive(Debug, Clone)]
pub struct ExtensionRow {
pub fields: HashMap<String, ExtensionValue>,
}
#[derive(Debug, Clone, PartialEq)]
pub enum ExtensionValue {
Str(String),
OptStr(Option<String>),
U32(u32),
StrList(Vec<String>),
Bytes(Vec<u8>),
}
pub struct BatchItem<'a> {
pub path: &'a str,
pub data: &'a [u8],
}
pub trait ArchiveTypePlugin: Send + Sync {
fn name(&self) -> &str;
fn type_id(&self) -> i8;
fn extract_metadata(&self, path: &str, data: &[u8]) -> Option<ExtensionRow>;
fn supports_batch(&self) -> bool { false }
fn batch_threshold(&self) -> usize { 200 * 1024 * 1024 }
fn extract_batch(&self, items: &[BatchItem<'_>]) -> Vec<Option<ExtensionRow>> {
items.iter().map(|item| self.extract_metadata(item.path, item.data)).collect()
}
}
pub struct IngestBatch {
files: Vec<StagedFile>,
total_bytes: usize,
}
pub struct StagedFile {
pub path: String,
pub data: Vec<u8>,
pub metadata: Option<ExtensionRow>,
}
impl IngestBatch {
pub fn new() -> Self {
Self { files: Vec::new(), total_bytes: 0 }
}
pub fn with_capacity(cap: usize) -> Self {
Self { files: Vec::with_capacity(cap), total_bytes: 0 }
}
pub fn push(&mut self, path: String, data: Vec<u8>) {
self.total_bytes += data.len();
self.files.push(StagedFile { path, data, metadata: None });
}
pub fn total_bytes(&self) -> usize {
self.total_bytes
}
pub fn len(&self) -> usize {
self.files.len()
}
pub fn is_empty(&self) -> bool {
self.files.is_empty()
}
pub fn extract_metadata(&mut self, plugin: &dyn ArchiveTypePlugin) {
if self.files.is_empty() {
return;
}
if plugin.supports_batch() {
let items: Vec<BatchItem<'_>> = self.files.iter()
.map(|f| BatchItem { path: &f.path, data: &f.data })
.collect();
let results = plugin.extract_batch(&items);
for (file, meta) in self.files.iter_mut().zip(results) {
file.metadata = meta;
}
} else {
for file in &mut self.files {
file.metadata = plugin.extract_metadata(&file.path, &file.data);
}
}
}
pub fn drain(&mut self) -> impl Iterator<Item = StagedFile> + '_ {
self.total_bytes = 0;
self.files.drain(..)
}
pub fn iter(&self) -> impl Iterator<Item = &StagedFile> {
self.files.iter()
}
}
impl Default for IngestBatch {
fn default() -> Self {
Self::new()
}
}
pub struct PluginRegistry {
plugin: Option<Box<dyn ArchiveTypePlugin>>,
}
impl PluginRegistry {
pub fn new() -> Self {
Self { plugin: None }
}
pub fn with_plugin(plugin: Box<dyn ArchiveTypePlugin>) -> Self {
Self { plugin: Some(plugin) }
}
pub fn batch_threshold(&self) -> usize {
self.plugin.as_ref().map(|p| p.batch_threshold()).unwrap_or(200 * 1024 * 1024)
}
pub fn extract_batch(&self, batch: &mut IngestBatch) {
if let Some(plugin) = &self.plugin {
batch.extract_metadata(plugin.as_ref());
}
}
pub fn extract(&self, path: &str, data: &[u8]) -> Option<ExtensionRow> {
self.plugin.as_ref()?.extract_metadata(path, data)
}
pub fn type_id(&self) -> Option<i8> {
self.plugin.as_ref().map(|p| p.type_id())
}
pub fn has_plugin(&self) -> bool {
self.plugin.is_some()
}
}
impl Default for PluginRegistry {
fn default() -> Self {
Self::new()
}
}