1use chrono::{DateTime, Utc};
20use serde::{Deserialize, Serialize};
21use std::collections::HashMap;
22use std::time::SystemTime;
23
24pub fn utc_from_system_time(st: SystemTime) -> Option<DateTime<Utc>> {
28 let duration = st.duration_since(std::time::UNIX_EPOCH).ok()?;
29 DateTime::from_timestamp(duration.as_secs() as i64, duration.subsec_nanos())
30}
31
32#[derive(Debug, Clone, Default, Serialize, Deserialize)]
34pub struct DocxCore {
35 #[serde(skip_serializing_if = "Option::is_none")]
36 pub creator: Option<String>,
37 #[serde(skip_serializing_if = "Option::is_none")]
38 pub last_modified_by: Option<String>,
39 #[serde(skip_serializing_if = "Option::is_none")]
40 pub revision: Option<String>,
41}
42
43#[derive(Debug, Clone, Default, Serialize, Deserialize)]
45pub struct PdfInfo {
46 #[serde(skip_serializing_if = "Option::is_none")]
47 pub page_count: Option<u32>,
48 #[serde(skip_serializing_if = "Option::is_none")]
49 pub author: Option<String>,
50 #[serde(skip_serializing_if = "Option::is_none")]
51 pub title: Option<String>,
52}
53
54#[derive(Debug, Clone, Default, Serialize, Deserialize)]
56pub struct ExifInfo {
57 #[serde(skip_serializing_if = "Option::is_none")]
58 pub camera: Option<String>,
59 #[serde(skip_serializing_if = "Option::is_none")]
60 pub lens: Option<String>,
61 #[serde(skip_serializing_if = "Option::is_none")]
62 pub gps_lat: Option<f64>,
63 #[serde(skip_serializing_if = "Option::is_none")]
64 pub gps_lon: Option<f64>,
65 #[serde(flatten)]
66 pub extra: HashMap<String, serde_json::Value>,
67}
68
69#[derive(Debug, Clone, Default, Serialize, Deserialize)]
71pub struct AudioInfo {
72 #[serde(skip_serializing_if = "Option::is_none")]
73 pub artist: Option<String>,
74 #[serde(skip_serializing_if = "Option::is_none")]
75 pub title: Option<String>,
76 #[serde(skip_serializing_if = "Option::is_none")]
77 pub album: Option<String>,
78}
79
80#[derive(Debug, Clone, Default, Serialize, Deserialize)]
82pub struct VideoInfo {
83 #[serde(skip_serializing_if = "Option::is_none")]
84 pub codec_name: Option<String>,
85 #[serde(skip_serializing_if = "Option::is_none")]
86 pub width: Option<u32>,
87 #[serde(skip_serializing_if = "Option::is_none")]
88 pub height: Option<u32>,
89 #[serde(skip_serializing_if = "Option::is_none")]
90 pub duration_secs: Option<f64>,
91}
92
93#[derive(Debug, Clone, Default, Serialize, Deserialize)]
94pub struct IdentityInfo {
95 pub path: String,
96 pub size: u64,
97 #[serde(skip_serializing_if = "Option::is_none")]
98 pub modified_at: Option<DateTime<Utc>>,
99 #[serde(skip_serializing_if = "Option::is_none")]
100 pub accessed_at: Option<DateTime<Utc>>,
101 #[serde(skip_serializing_if = "Option::is_none")]
102 pub created_at: Option<DateTime<Utc>>,
103 #[serde(skip_serializing_if = "Option::is_none")]
104 pub inode: Option<u64>,
105 #[serde(skip_serializing_if = "Option::is_none")]
106 pub device_id: Option<u64>,
107}
108
109#[derive(Debug, Clone, Default, Serialize, Deserialize)]
110pub struct HashesInfo {
111 #[serde(skip_serializing_if = "Option::is_none")]
112 pub xxhash64: Option<String>,
113 #[serde(skip_serializing_if = "Option::is_none")]
114 pub md5: Option<String>,
115 #[serde(skip_serializing_if = "Option::is_none")]
116 pub sha256: Option<String>,
117 #[serde(skip_serializing_if = "Option::is_none")]
118 pub mode: Option<String>,
119}
120
121#[derive(Debug, Clone, Default, Serialize, Deserialize)]
122pub struct FormatInfo {
123 #[serde(skip_serializing_if = "Option::is_none")]
124 pub kind: Option<String>,
125 #[serde(skip_serializing_if = "Option::is_none")]
126 pub mime: Option<String>,
127 #[serde(skip_serializing_if = "Option::is_none")]
128 pub extension_match: Option<bool>,
129 #[serde(skip_serializing_if = "Option::is_none")]
130 pub confidence: Option<f64>,
131}
132
133#[derive(Debug, Clone, Default, Serialize, Deserialize)]
134pub struct MetaInfo {
135 #[serde(skip_serializing_if = "Option::is_none")]
136 pub phash: Option<u64>,
137 #[serde(skip_serializing_if = "Option::is_none")]
138 pub exif: Option<ExifInfo>,
139 #[serde(skip_serializing_if = "Option::is_none")]
140 pub audio: Option<AudioInfo>,
141 #[serde(skip_serializing_if = "Option::is_none")]
142 pub video: Option<VideoInfo>,
143 #[serde(skip_serializing_if = "Option::is_none")]
144 pub pdf: Option<PdfInfo>,
145 #[serde(skip_serializing_if = "Option::is_none")]
146 pub docx: Option<DocxCore>,
147}
148
149#[derive(Debug, Clone, Default, Serialize, Deserialize)]
150pub struct FileRecord {
151 pub schema_version: u32,
152 pub identity: IdentityInfo,
153 #[serde(skip_serializing_if = "Option::is_none")]
154 pub hashes: Option<HashesInfo>,
155 #[serde(skip_serializing_if = "Option::is_none")]
156 pub format: Option<FormatInfo>,
157 #[serde(skip_serializing_if = "Option::is_none")]
158 pub meta: Option<MetaInfo>,
159 #[serde(skip_serializing_if = "Option::is_none")]
160 pub error: Option<String>,
161}
162
163#[cfg(test)]
164mod tests {
165 use super::*;
166 use chrono::TimeZone;
167 use std::collections::HashMap;
168
169 #[test]
170 fn serialized_modified_at_is_rfc3339_utc_z() {
171 let fixed = Utc.with_ymd_and_hms(2026, 4, 6, 12, 34, 56).unwrap();
172 let record = FileRecord {
173 schema_version: 1,
174 identity: IdentityInfo {
175 path: "/tmp/x".into(),
176 size: 0,
177 modified_at: Some(fixed),
178 ..Default::default()
179 },
180 ..Default::default()
181 };
182 let json = serde_json::to_string(&record).unwrap();
183 assert!(json.contains("\"schema_version\":1"));
184 assert!(json.contains("\"identity\""));
185 assert!(json.contains("\"modified_at\""));
186 assert!(json.contains("2026-04-06T12:34:56"));
187 assert!(json.contains('Z'));
188 let parsed: FileRecord = serde_json::from_str(&json).unwrap();
189 assert_eq!(parsed.identity.modified_at, Some(fixed));
190 }
191
192 #[test]
193 fn exif_unknown_key_roundtrip_survives() {
194 let mut extra = HashMap::new();
195 extra.insert(
196 "ImageDescription".to_string(),
197 serde_json::json!("sample description"),
198 );
199 let record = FileRecord {
200 schema_version: 1,
201 identity: IdentityInfo {
202 path: "/tmp/x.jpg".into(),
203 size: 1,
204 ..Default::default()
205 },
206 meta: Some(MetaInfo {
207 exif: Some(ExifInfo {
208 camera: Some("A".into()),
209 lens: None,
210 gps_lat: None,
211 gps_lon: None,
212 extra,
213 }),
214 ..Default::default()
215 }),
216 ..Default::default()
217 };
218 let json = serde_json::to_string(&record).unwrap();
219 let parsed: FileRecord = serde_json::from_str(&json).unwrap();
220 let exif = parsed.meta.unwrap().exif.unwrap();
221 assert_eq!(
222 exif.extra.get("ImageDescription"),
223 Some(&serde_json::json!("sample description"))
224 );
225 }
226
227 #[test]
228 fn json_deserializes_minimal_ndjson_line() {
229 let json = r#"{"schema_version":1,"identity":{"path":"a","size":2}}"#;
230 let rec: FileRecord = serde_json::from_str(json).unwrap();
231 assert_eq!(rec.identity.path, "a");
232 assert_eq!(rec.identity.size, 2);
233 }
234
235 #[test]
236 fn json_deserializes_large_error_field() {
237 let big = "a".repeat(75 * 1024);
238 let json = format!(
239 r#"{{"schema_version":1,"identity":{{"path":"/tmp/x","size":1}},"error":{}}}"#,
240 serde_json::to_string(&big).unwrap()
241 );
242 let rec: FileRecord = serde_json::from_str(&json).unwrap();
243 assert_eq!(rec.identity.path, "/tmp/x");
244 assert_eq!(rec.identity.size, 1);
245 assert_eq!(rec.error.as_deref(), Some(big.as_str()));
246 }
247}