znippy-plugin-media 0.9.0

Media (image/audio/video) metadata plugin for znippy — pure-Rust, airgap-friendly
//! The `ArchiveTypePlugin` implementation registered in the znippy CLI.
//!
//! One Arrow schema spans image/audio/video; a per-file [`MediaMeta`] fills only
//! the columns relevant to its kind and the rest serialize to null cells. The
//! writer maps `ExtensionValue::U32` onto `UInt32` columns and everything else
//! onto `Utf8`, so numeric facets (width / duration / sample rate) stay typed and
//! queryable.

use std::collections::HashMap;

use znippy_common::arrow::datatypes::{DataType, Field};
use znippy_common::plugin::{
    ArchiveTypePlugin, ExtensionRow, ExtensionValue, HandlerCommand, HandlerMeta,
};

use crate::{MEDIA_EXTENSIONS, MEDIA_TYPE_ID, MediaMeta, extract_media_metadata, is_media_path};

/// Native media handler: images (dims/color/EXIF), audio (duration/rate/tags),
/// video (dims/codecs/framerate). Stores the blob verbatim, extracts metadata.
pub struct NativeMediaPlugin;

impl NativeMediaPlugin {
    pub fn new() -> Self {
        NativeMediaPlugin
    }
}

impl Default for NativeMediaPlugin {
    fn default() -> Self {
        Self::new()
    }
}

impl ArchiveTypePlugin for NativeMediaPlugin {
    fn name(&self) -> &str {
        "media"
    }

    fn type_id(&self) -> i8 {
        MEDIA_TYPE_ID
    }

    fn meta(&self) -> HandlerMeta {
        HandlerMeta {
            name: "media".into(),
            aliases: vec!["image".into(), "audio".into(), "video".into()],
            type_id: MEDIA_TYPE_ID,
            ecosystem: "Media files (images / audio / video)".into(),
            extensions: MEDIA_EXTENSIONS.iter().map(|s| s.to_string()).collect(),
            description: "Image / audio / video metadata (dimensions, EXIF, duration, codecs, \
                          tags) — pure-Rust, blob stored verbatim"
                .into(),
            commands: vec![HandlerCommand::new(
                "probe",
                "Print parsed media metadata for a file (kind/format/dims/duration/codecs)",
            )],
        }
    }

    fn run_command(&self, cmd: &str, args: &[String]) -> anyhow::Result<()> {
        match cmd {
            "probe" => {
                let path =
                    args.first().ok_or_else(|| anyhow::anyhow!("usage: media probe <file>"))?;
                let data = std::fs::read(path)?;
                match extract_media_metadata(path, &data) {
                    Some(m) => {
                        println!("{:#?}", m);
                        Ok(())
                    }
                    None => anyhow::bail!("media: could not parse '{}'", path),
                }
            }
            other => anyhow::bail!("media: unknown subcommand '{}'", other),
        }
    }

    fn matches_path(&self, path: &str) -> bool {
        is_media_path(path)
    }

    fn schema_fields(&self) -> Vec<Field> {
        vec![
            Field::new("media_kind", DataType::Utf8, true),
            Field::new("format", DataType::Utf8, true),
            Field::new("width", DataType::UInt32, true),
            Field::new("height", DataType::UInt32, true),
            Field::new("color", DataType::Utf8, true),
            Field::new("duration_ms", DataType::UInt32, true),
            Field::new("sample_rate", DataType::UInt32, true),
            Field::new("channels", DataType::UInt32, true),
            Field::new("bitrate", DataType::UInt32, true),
            Field::new("audio_codec", DataType::Utf8, true),
            Field::new("video_codec", DataType::Utf8, true),
            Field::new("framerate", DataType::Utf8, true),
            Field::new("title", DataType::Utf8, true),
            Field::new("artist", DataType::Utf8, true),
            Field::new("album", DataType::Utf8, true),
            Field::new("camera", DataType::Utf8, true),
            Field::new("datetime", DataType::Utf8, true),
            Field::new("orientation", DataType::UInt32, true),
            Field::new("gps", DataType::Utf8, true),
        ]
    }

    fn extract_metadata(&self, path: &str, data: &[u8]) -> Option<ExtensionRow> {
        let m = extract_media_metadata(path, data)?;
        Some(media_meta_to_row(&m))
    }
}

/// Map a [`MediaMeta`] onto the Arrow row. Only present fields are inserted;
/// absent keys yield null cells (the writer appends null when a key is missing).
fn media_meta_to_row(m: &MediaMeta) -> ExtensionRow {
    let mut f: HashMap<String, ExtensionValue> = HashMap::new();

    // String columns
    if let Some(kind) = m.kind {
        f.insert("media_kind".to_string(), ExtensionValue::Str(kind.as_str().to_string()));
    }
    let mut put_str = |k: &str, v: &Option<String>| {
        if let Some(s) = v {
            f.insert(k.to_string(), ExtensionValue::Str(s.clone()));
        }
    };
    put_str("format", &m.format);
    put_str("color", &m.color);
    put_str("audio_codec", &m.audio_codec);
    put_str("video_codec", &m.video_codec);
    put_str("framerate", &m.framerate);
    put_str("title", &m.title);
    put_str("artist", &m.artist);
    put_str("album", &m.album);
    put_str("camera", &m.camera);
    put_str("datetime", &m.datetime);
    put_str("gps", &m.gps);

    // Numeric (UInt32) columns
    let mut put_u32 = |k: &str, v: Option<u32>| {
        if let Some(n) = v {
            f.insert(k.to_string(), ExtensionValue::U32(n));
        }
    };
    put_u32("width", m.width);
    put_u32("height", m.height);
    put_u32("duration_ms", m.duration_ms);
    put_u32("sample_rate", m.sample_rate);
    put_u32("channels", m.channels);
    put_u32("bitrate", m.bitrate);
    put_u32("orientation", m.orientation);

    ExtensionRow { fields: f }
}