use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, HashMap};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum SourceKind {
AppleNote,
Screenshot,
Photo,
Video,
Audio,
Pdf,
Markdown,
Unknown,
}
impl SourceKind {
pub fn as_str(self) -> &'static str {
match self {
Self::AppleNote => "apple_note",
Self::Screenshot => "screenshot",
Self::Photo => "photo",
Self::Video => "video",
Self::Audio => "audio",
Self::Pdf => "pdf",
Self::Markdown => "markdown",
Self::Unknown => "unknown",
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SourceProvenance {
pub source_kind: SourceKind,
pub source_uri: String,
pub origin_id: Option<String>,
pub fingerprint: Option<String>,
pub captured_at: Option<DateTime<Utc>>,
pub imported_at: DateTime<Utc>,
pub metadata: HashMap<String, String>,
}
impl SourceProvenance {
pub fn new(source_kind: SourceKind, source_uri: impl Into<String>) -> Self {
Self {
source_kind,
source_uri: source_uri.into(),
origin_id: None,
fingerprint: None,
captured_at: None,
imported_at: Utc::now(),
metadata: HashMap::new(),
}
}
pub fn with_origin_id(mut self, origin_id: impl Into<String>) -> Self {
self.origin_id = Some(origin_id.into());
self
}
pub fn with_fingerprint(mut self, fingerprint: impl Into<String>) -> Self {
self.fingerprint = Some(fingerprint.into());
self
}
pub fn with_captured_at(mut self, captured_at: DateTime<Utc>) -> Self {
self.captured_at = Some(captured_at);
self
}
pub fn with_metadata(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
self.metadata.insert(key.into(), value.into());
self
}
pub fn idempotency_key(&self) -> String {
if let Some(origin_id) = &self.origin_id {
return format!("{}:origin:{}", self.source_kind.as_str(), origin_id);
}
if let Some(fingerprint) = &self.fingerprint {
return format!("{}:fingerprint:{}", self.source_kind.as_str(), fingerprint);
}
format!("{}:uri:{}", self.source_kind.as_str(), self.source_uri)
}
pub fn metadata_pairs(&self) -> Vec<(String, String)> {
let mut out = BTreeMap::new();
out.insert(
"source.kind".to_string(),
self.source_kind.as_str().to_string(),
);
out.insert("source.uri".to_string(), self.source_uri.clone());
out.insert("source.idempotency_key".to_string(), self.idempotency_key());
if let Some(origin_id) = &self.origin_id {
out.insert("source.origin_id".to_string(), origin_id.clone());
}
if let Some(fingerprint) = &self.fingerprint {
out.insert("source.fingerprint".to_string(), fingerprint.clone());
}
if let Some(captured_at) = self.captured_at {
out.insert("source.captured_at".to_string(), captured_at.to_rfc3339());
}
out.insert(
"source.imported_at".to_string(),
self.imported_at.to_rfc3339(),
);
for (key, value) in &self.metadata {
out.insert(format!("source.meta.{key}"), value.clone());
}
out.into_iter().collect()
}
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::TimeZone;
#[test]
fn idempotency_key_prefers_origin_id() {
let provenance = SourceProvenance::new(SourceKind::AppleNote, "notes://abc")
.with_fingerprint("sha256:deadbeef")
.with_origin_id("note-123");
assert_eq!(
provenance.idempotency_key(),
"apple_note:origin:note-123".to_string()
);
}
#[test]
fn metadata_pairs_are_namespaced_and_sorted() {
let captured = Utc.with_ymd_and_hms(2025, 1, 2, 3, 4, 5).unwrap();
let mut provenance = SourceProvenance::new(SourceKind::Screenshot, "/tmp/shot.png")
.with_fingerprint("abc123")
.with_captured_at(captured);
provenance.imported_at = Utc.with_ymd_and_hms(2025, 1, 2, 10, 11, 12).unwrap();
provenance
.metadata
.insert("window_title".into(), "Mail".into());
provenance.metadata.insert("app".into(), "Mail".into());
let pairs = provenance.metadata_pairs();
let keys: Vec<&str> = pairs.iter().map(|(k, _)| k.as_str()).collect();
assert!(keys.windows(2).all(|w| w[0] <= w[1]));
assert!(keys.contains(&"source.kind"));
assert!(keys.contains(&"source.meta.window_title"));
assert!(keys.contains(&"source.idempotency_key"));
}
}