Skip to main content

output/
record.rs

1//! Serializable per-file scan result (align with downstream `core/model/file.go` when available).
2//!
3//! # Filesystem timestamps (`FileRecord`)
4//!
5//! Optional fields `modified_at`, `accessed_at`, and `created_at` are filled from the same
6//! [`std::fs::Metadata`] used for `size`. They are omitted from JSON when unavailable
7//! (`#[serde(skip_serializing_if = "Option::is_none")]`). Wire format is RFC3339 in UTC
8//! (e.g. `2026-04-06T12:34:56.789Z`).
9//!
10//! - **`modified_at`** — Last content modification time (POSIX `mtime`).
11//! - **`accessed_at`** — Last access time when the platform exposes it; omitted on `Err` from
12//!   [`Metadata::accessed`](std::fs::Metadata::accessed). On Linux, `relatime` and similar mount
13//!   options may mean atime is not updated on every read.
14//! - **`created_at`** — Best-effort creation / birth time from [`Metadata::created`](std::fs::Metadata::created):
15//!   Windows file creation time when available; on Unix, birth time when the OS reports it.
16//!   Omitted when unsupported. This is **not** inode change time (`ctime`); we do not fake
17//!   `created_at` from `ctime`.
18
19use chrono::{DateTime, Utc};
20use serde::{Deserialize, Serialize};
21use std::collections::HashMap;
22use std::time::SystemTime;
23
24/// Converts [`SystemTime`] to UTC [`DateTime`] for JSON serialization.
25///
26/// Returns [`None`] if the instant is before the Unix epoch or out of range for `chrono`.
27pub fn utc_from_system_time(st: SystemTime) -> Option<DateTime<Utc>> {
28    let duration = st.duration_since(std::time::UNIX_EPOCH).ok()?;
29    DateTime::from_timestamp(duration.as_secs() as i64, duration.subsec_nanos())
30}
31
32/// Core document properties from DOCX `docProps/core.xml` (subset).
33#[derive(Debug, Clone, Default, Serialize, Deserialize)]
34pub struct DocxCore {
35    #[serde(skip_serializing_if = "Option::is_none")]
36    pub creator: Option<String>,
37    #[serde(skip_serializing_if = "Option::is_none")]
38    pub last_modified_by: Option<String>,
39    #[serde(skip_serializing_if = "Option::is_none")]
40    pub revision: Option<String>,
41}
42
43/// PDF metadata subset from `lopdf`.
44#[derive(Debug, Clone, Default, Serialize, Deserialize)]
45pub struct PdfInfo {
46    #[serde(skip_serializing_if = "Option::is_none")]
47    pub page_count: Option<u32>,
48    #[serde(skip_serializing_if = "Option::is_none")]
49    pub author: Option<String>,
50    #[serde(skip_serializing_if = "Option::is_none")]
51    pub title: Option<String>,
52}
53
54/// Loose EXIF / image metadata bag for JSON output.
55#[derive(Debug, Clone, Default, Serialize, Deserialize)]
56pub struct ExifInfo {
57    #[serde(skip_serializing_if = "Option::is_none")]
58    pub camera: Option<String>,
59    #[serde(skip_serializing_if = "Option::is_none")]
60    pub lens: Option<String>,
61    #[serde(skip_serializing_if = "Option::is_none")]
62    pub gps_lat: Option<f64>,
63    #[serde(skip_serializing_if = "Option::is_none")]
64    pub gps_lon: Option<f64>,
65    #[serde(flatten)]
66    pub extra: HashMap<String, serde_json::Value>,
67}
68
69/// Audio tags summary (from lofty).
70#[derive(Debug, Clone, Default, Serialize, Deserialize)]
71pub struct AudioInfo {
72    #[serde(skip_serializing_if = "Option::is_none")]
73    pub artist: Option<String>,
74    #[serde(skip_serializing_if = "Option::is_none")]
75    pub title: Option<String>,
76    #[serde(skip_serializing_if = "Option::is_none")]
77    pub album: Option<String>,
78}
79
80/// Video stream summary from `ffprobe` JSON (minimal).
81#[derive(Debug, Clone, Default, Serialize, Deserialize)]
82pub struct VideoInfo {
83    #[serde(skip_serializing_if = "Option::is_none")]
84    pub codec_name: Option<String>,
85    #[serde(skip_serializing_if = "Option::is_none")]
86    pub width: Option<u32>,
87    #[serde(skip_serializing_if = "Option::is_none")]
88    pub height: Option<u32>,
89    #[serde(skip_serializing_if = "Option::is_none")]
90    pub duration_secs: Option<f64>,
91}
92
93#[derive(Debug, Clone, Default, Serialize, Deserialize)]
94pub struct IdentityInfo {
95    pub path: String,
96    pub size: u64,
97    #[serde(skip_serializing_if = "Option::is_none")]
98    pub modified_at: Option<DateTime<Utc>>,
99    #[serde(skip_serializing_if = "Option::is_none")]
100    pub accessed_at: Option<DateTime<Utc>>,
101    #[serde(skip_serializing_if = "Option::is_none")]
102    pub created_at: Option<DateTime<Utc>>,
103    #[serde(skip_serializing_if = "Option::is_none")]
104    pub inode: Option<u64>,
105    #[serde(skip_serializing_if = "Option::is_none")]
106    pub device_id: Option<u64>,
107}
108
109#[derive(Debug, Clone, Default, Serialize, Deserialize)]
110pub struct HashesInfo {
111    #[serde(skip_serializing_if = "Option::is_none")]
112    pub xxhash64: Option<String>,
113    #[serde(skip_serializing_if = "Option::is_none")]
114    pub md5: Option<String>,
115    #[serde(skip_serializing_if = "Option::is_none")]
116    pub sha256: Option<String>,
117    #[serde(skip_serializing_if = "Option::is_none")]
118    pub mode: Option<String>,
119}
120
121#[derive(Debug, Clone, Default, Serialize, Deserialize)]
122pub struct FormatInfo {
123    #[serde(skip_serializing_if = "Option::is_none")]
124    pub kind: Option<String>,
125    #[serde(skip_serializing_if = "Option::is_none")]
126    pub mime: Option<String>,
127    #[serde(skip_serializing_if = "Option::is_none")]
128    pub extension_match: Option<bool>,
129    #[serde(skip_serializing_if = "Option::is_none")]
130    pub confidence: Option<f64>,
131}
132
133#[derive(Debug, Clone, Default, Serialize, Deserialize)]
134pub struct MetaInfo {
135    #[serde(skip_serializing_if = "Option::is_none")]
136    pub phash: Option<u64>,
137    #[serde(skip_serializing_if = "Option::is_none")]
138    pub exif: Option<ExifInfo>,
139    #[serde(skip_serializing_if = "Option::is_none")]
140    pub audio: Option<AudioInfo>,
141    #[serde(skip_serializing_if = "Option::is_none")]
142    pub video: Option<VideoInfo>,
143    #[serde(skip_serializing_if = "Option::is_none")]
144    pub pdf: Option<PdfInfo>,
145    #[serde(skip_serializing_if = "Option::is_none")]
146    pub docx: Option<DocxCore>,
147}
148
149#[derive(Debug, Clone, Default, Serialize, Deserialize)]
150pub struct FileRecord {
151    pub schema_version: u32,
152    pub identity: IdentityInfo,
153    #[serde(skip_serializing_if = "Option::is_none")]
154    pub hashes: Option<HashesInfo>,
155    #[serde(skip_serializing_if = "Option::is_none")]
156    pub format: Option<FormatInfo>,
157    #[serde(skip_serializing_if = "Option::is_none")]
158    pub meta: Option<MetaInfo>,
159    #[serde(skip_serializing_if = "Option::is_none")]
160    pub error: Option<String>,
161}
162
163#[cfg(test)]
164mod tests {
165    use super::*;
166    use chrono::TimeZone;
167    use std::collections::HashMap;
168
169    #[test]
170    fn serialized_modified_at_is_rfc3339_utc_z() {
171        let fixed = Utc.with_ymd_and_hms(2026, 4, 6, 12, 34, 56).unwrap();
172        let record = FileRecord {
173            schema_version: 1,
174            identity: IdentityInfo {
175                path: "/tmp/x".into(),
176                size: 0,
177                modified_at: Some(fixed),
178                ..Default::default()
179            },
180            ..Default::default()
181        };
182        let json = serde_json::to_string(&record).unwrap();
183        assert!(json.contains("\"schema_version\":1"));
184        assert!(json.contains("\"identity\""));
185        assert!(json.contains("\"modified_at\""));
186        assert!(json.contains("2026-04-06T12:34:56"));
187        assert!(json.contains('Z'));
188        let parsed: FileRecord = serde_json::from_str(&json).unwrap();
189        assert_eq!(parsed.identity.modified_at, Some(fixed));
190    }
191
192    #[test]
193    fn exif_unknown_key_roundtrip_survives() {
194        let mut extra = HashMap::new();
195        extra.insert(
196            "ImageDescription".to_string(),
197            serde_json::json!("sample description"),
198        );
199        let record = FileRecord {
200            schema_version: 1,
201            identity: IdentityInfo {
202                path: "/tmp/x.jpg".into(),
203                size: 1,
204                ..Default::default()
205            },
206            meta: Some(MetaInfo {
207                exif: Some(ExifInfo {
208                    camera: Some("A".into()),
209                    lens: None,
210                    gps_lat: None,
211                    gps_lon: None,
212                    extra,
213                }),
214                ..Default::default()
215            }),
216            ..Default::default()
217        };
218        let json = serde_json::to_string(&record).unwrap();
219        let parsed: FileRecord = serde_json::from_str(&json).unwrap();
220        let exif = parsed.meta.unwrap().exif.unwrap();
221        assert_eq!(
222            exif.extra.get("ImageDescription"),
223            Some(&serde_json::json!("sample description"))
224        );
225    }
226
227    #[test]
228    fn json_deserializes_minimal_ndjson_line() {
229        let json = r#"{"schema_version":1,"identity":{"path":"a","size":2}}"#;
230        let rec: FileRecord = serde_json::from_str(json).unwrap();
231        assert_eq!(rec.identity.path, "a");
232        assert_eq!(rec.identity.size, 2);
233    }
234
235    #[test]
236    fn json_deserializes_large_error_field() {
237        let big = "a".repeat(75 * 1024);
238        let json = format!(
239            r#"{{"schema_version":1,"identity":{{"path":"/tmp/x","size":1}},"error":{}}}"#,
240            serde_json::to_string(&big).unwrap()
241        );
242        let rec: FileRecord = serde_json::from_str(&json).unwrap();
243        assert_eq!(rec.identity.path, "/tmp/x");
244        assert_eq!(rec.identity.size, 1);
245        assert_eq!(rec.error.as_deref(), Some(big.as_str()));
246    }
247}