converge_knowledge/ingest/
source.rs1use chrono::{DateTime, Utc};
7use serde::{Deserialize, Serialize};
8use std::collections::{BTreeMap, HashMap};
9
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
12#[serde(rename_all = "snake_case")]
13pub enum SourceKind {
14 AppleNote,
16 Screenshot,
18 Photo,
20 Video,
22 Audio,
24 Pdf,
26 Markdown,
28 Unknown,
30}
31
32impl SourceKind {
33 pub fn as_str(self) -> &'static str {
35 match self {
36 Self::AppleNote => "apple_note",
37 Self::Screenshot => "screenshot",
38 Self::Photo => "photo",
39 Self::Video => "video",
40 Self::Audio => "audio",
41 Self::Pdf => "pdf",
42 Self::Markdown => "markdown",
43 Self::Unknown => "unknown",
44 }
45 }
46}
47
48#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct SourceProvenance {
53 pub source_kind: SourceKind,
55 pub source_uri: String,
57 pub origin_id: Option<String>,
59 pub fingerprint: Option<String>,
61 pub captured_at: Option<DateTime<Utc>>,
63 pub imported_at: DateTime<Utc>,
65 pub metadata: HashMap<String, String>,
67}
68
69impl SourceProvenance {
70 pub fn new(source_kind: SourceKind, source_uri: impl Into<String>) -> Self {
72 Self {
73 source_kind,
74 source_uri: source_uri.into(),
75 origin_id: None,
76 fingerprint: None,
77 captured_at: None,
78 imported_at: Utc::now(),
79 metadata: HashMap::new(),
80 }
81 }
82
83 pub fn with_origin_id(mut self, origin_id: impl Into<String>) -> Self {
85 self.origin_id = Some(origin_id.into());
86 self
87 }
88
89 pub fn with_fingerprint(mut self, fingerprint: impl Into<String>) -> Self {
91 self.fingerprint = Some(fingerprint.into());
92 self
93 }
94
95 pub fn with_captured_at(mut self, captured_at: DateTime<Utc>) -> Self {
97 self.captured_at = Some(captured_at);
98 self
99 }
100
101 pub fn with_metadata(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
103 self.metadata.insert(key.into(), value.into());
104 self
105 }
106
107 pub fn idempotency_key(&self) -> String {
114 if let Some(origin_id) = &self.origin_id {
115 return format!("{}:origin:{}", self.source_kind.as_str(), origin_id);
116 }
117
118 if let Some(fingerprint) = &self.fingerprint {
119 return format!("{}:fingerprint:{}", self.source_kind.as_str(), fingerprint);
120 }
121
122 format!("{}:uri:{}", self.source_kind.as_str(), self.source_uri)
123 }
124
125 pub fn metadata_pairs(&self) -> Vec<(String, String)> {
129 let mut out = BTreeMap::new();
130 out.insert(
131 "source.kind".to_string(),
132 self.source_kind.as_str().to_string(),
133 );
134 out.insert("source.uri".to_string(), self.source_uri.clone());
135 out.insert("source.idempotency_key".to_string(), self.idempotency_key());
136
137 if let Some(origin_id) = &self.origin_id {
138 out.insert("source.origin_id".to_string(), origin_id.clone());
139 }
140 if let Some(fingerprint) = &self.fingerprint {
141 out.insert("source.fingerprint".to_string(), fingerprint.clone());
142 }
143 if let Some(captured_at) = self.captured_at {
144 out.insert("source.captured_at".to_string(), captured_at.to_rfc3339());
145 }
146 out.insert(
147 "source.imported_at".to_string(),
148 self.imported_at.to_rfc3339(),
149 );
150
151 for (key, value) in &self.metadata {
152 out.insert(format!("source.meta.{key}"), value.clone());
153 }
154
155 out.into_iter().collect()
156 }
157}
158
159#[cfg(test)]
160mod tests {
161 use super::*;
162 use chrono::TimeZone;
163
164 #[test]
165 fn idempotency_key_prefers_origin_id() {
166 let provenance = SourceProvenance::new(SourceKind::AppleNote, "notes://abc")
167 .with_fingerprint("sha256:deadbeef")
168 .with_origin_id("note-123");
169
170 assert_eq!(
171 provenance.idempotency_key(),
172 "apple_note:origin:note-123".to_string()
173 );
174 }
175
176 #[test]
177 fn metadata_pairs_are_namespaced_and_sorted() {
178 let captured = Utc.with_ymd_and_hms(2025, 1, 2, 3, 4, 5).unwrap();
179 let mut provenance = SourceProvenance::new(SourceKind::Screenshot, "/tmp/shot.png")
180 .with_fingerprint("abc123")
181 .with_captured_at(captured);
182 provenance.imported_at = Utc.with_ymd_and_hms(2025, 1, 2, 10, 11, 12).unwrap();
183 provenance
184 .metadata
185 .insert("window_title".into(), "Mail".into());
186 provenance.metadata.insert("app".into(), "Mail".into());
187
188 let pairs = provenance.metadata_pairs();
189 let keys: Vec<&str> = pairs.iter().map(|(k, _)| k.as_str()).collect();
190 assert!(keys.windows(2).all(|w| w[0] <= w[1]));
191 assert!(keys.contains(&"source.kind"));
192 assert!(keys.contains(&"source.meta.window_title"));
193 assert!(keys.contains(&"source.idempotency_key"));
194 }
195}