Skip to main content

simple_gal/
metadata.rs

1//! Image metadata extraction and resolution.
2//!
3//! Each image can carry metadata (title, description) from two independent sources:
4//!
5//! ## Filesystem sources (read during scan phase)
6//!
7//! - **Title**: Derived from the filename stem via the `NNN-name` convention.
8//!   `001-My-Photo.jpg` becomes "My Photo". Simple, requires no tooling, and
9//!   consistent with album and page naming.
10//!
11//! - **Description**: Read from a sidecar text file with the same stem as the image.
12//!   `001-My-Photo.txt` alongside `001-My-Photo.jpg`. Follows the same pattern
13//!   as `info.txt` for album descriptions — plain text, no special format.
14//!
15//! ## Embedded metadata sources (read during process phase)
16//!
17//! - **Title**: IPTC Object Name (`IPTC:2:05`). This is the "Title" field in
18//!   Lightroom, Capture One, and most DAM (Digital Asset Management) software.
19//!
20//! - **Description**: IPTC Caption-Abstract (`IPTC:2:120`). The standard "Caption"
21//!   field in Lightroom. This is by far the most-used text metadata field among
22//!   photographers — "Headline" and "Extended Description" exist in the IPTC spec
23//!   but are journalism holdovers rarely used in fine art workflows.
24//!
25//! ## Resolution priority
26//!
27//! Each field is resolved independently. The first non-empty value wins:
28//!
29//! - **Title**: EXIF title → filename title → None
30//! - **Description**: sidecar `.txt` → EXIF caption → None
31//!
32//! The rationale: embedded metadata represents deliberate curation in a photography
33//! tool (the photographer typed it into Lightroom on purpose) and should win over
34//! mechanical filename extraction. For descriptions, sidecar files are explicit
35//! overrides — the user created a file on purpose — so they trump embedded metadata.
36//!
37//! ## Title sanitization
38//!
39//! Since resolved titles may end up in URLs and filenames (via the image page slug),
40//! EXIF-sourced titles are sanitized for safe use: truncated to a reasonable length,
41//! non-URL-safe characters replaced with dashes, consecutive dashes collapsed.
42//! This prevents filesystem errors from long titles and broken URLs from special
43//! characters.
44
45use std::path::Path;
46
47/// Resolve a metadata field from multiple sources.
48///
49/// Takes a list of optional values in priority order and returns the first
50/// non-None, non-empty value. This is the core merge operation used for
51/// both title and description resolution.
52///
53/// ```text
54/// title:       resolve(&[exif_title,   filename_title])
55/// description: resolve(&[sidecar_text, exif_caption])
56/// ```
57pub fn resolve(sources: &[Option<&str>]) -> Option<String> {
58    sources
59        .iter()
60        .filter_map(|opt| {
61            opt.map(str::trim)
62                .filter(|s| !s.is_empty())
63                .map(String::from)
64        })
65        .next()
66}
67
68/// Read a sidecar `.txt` file for an image.
69///
70/// Given a path like `content/album/001-photo.jpg`, looks for
71/// `content/album/001-photo.txt` and returns its trimmed contents.
72/// Returns `None` if the file doesn't exist or is empty.
73pub fn read_sidecar(image_path: &Path) -> Option<String> {
74    let sidecar = image_path.with_extension("txt");
75    std::fs::read_to_string(sidecar)
76        .ok()
77        .map(|s| s.trim().to_string())
78        .filter(|s| !s.is_empty())
79}
80
81const MAX_SLUG_LEN: usize = 80;
82
83/// Sanitize a title string for use in URLs and filenames.
84///
85/// - Replaces non-alphanumeric characters (except dashes) with dashes
86/// - Collapses consecutive dashes into one
87/// - Strips leading and trailing dashes
88/// - Truncates to `MAX_SLUG_LEN` characters (breaks at last dash before limit)
89pub fn sanitize_slug(title: &str) -> String {
90    let slug: String = title
91        .chars()
92        .map(|c| {
93            if c.is_ascii_alphanumeric() || c == '-' {
94                c
95            } else {
96                '-'
97            }
98        })
99        .collect();
100
101    // Collapse consecutive dashes
102    let mut collapsed = String::with_capacity(slug.len());
103    let mut prev_dash = false;
104    for c in slug.chars() {
105        if c == '-' {
106            if !prev_dash {
107                collapsed.push('-');
108            }
109            prev_dash = true;
110        } else {
111            collapsed.push(c);
112            prev_dash = false;
113        }
114    }
115
116    // Strip leading/trailing dashes
117    let trimmed = collapsed.trim_matches('-');
118
119    // Truncate at word boundary (last dash before limit)
120    if trimmed.len() <= MAX_SLUG_LEN {
121        trimmed.to_string()
122    } else {
123        let truncated = &trimmed[..MAX_SLUG_LEN];
124        match truncated.rfind('-') {
125            Some(pos) => truncated[..pos].to_string(),
126            None => truncated.to_string(),
127        }
128    }
129}
130
131#[cfg(test)]
132mod tests {
133    use super::*;
134    use std::fs;
135    use tempfile::TempDir;
136
137    // =========================================================================
138    // resolve() tests
139    // =========================================================================
140
141    #[test]
142    fn resolve_picks_first_non_none() {
143        assert_eq!(
144            resolve(&[Some("EXIF Title"), Some("Filename Title")]),
145            Some("EXIF Title".to_string())
146        );
147    }
148
149    #[test]
150    fn resolve_skips_none() {
151        assert_eq!(
152            resolve(&[None, Some("Fallback")]),
153            Some("Fallback".to_string())
154        );
155    }
156
157    #[test]
158    fn resolve_skips_empty_strings() {
159        assert_eq!(
160            resolve(&[Some(""), Some("Fallback")]),
161            Some("Fallback".to_string())
162        );
163    }
164
165    #[test]
166    fn resolve_skips_whitespace_only() {
167        assert_eq!(
168            resolve(&[Some("  \n\t  "), Some("Fallback")]),
169            Some("Fallback".to_string())
170        );
171    }
172
173    #[test]
174    fn resolve_returns_none_when_all_none() {
175        assert_eq!(resolve(&[None, None]), None);
176    }
177
178    #[test]
179    fn resolve_returns_none_for_empty_sources() {
180        assert_eq!(resolve(&[]), None);
181    }
182
183    #[test]
184    fn resolve_trims_whitespace() {
185        assert_eq!(
186            resolve(&[Some("  Padded Title  ")]),
187            Some("Padded Title".to_string())
188        );
189    }
190
191    // =========================================================================
192    // read_sidecar() tests
193    // =========================================================================
194
195    #[test]
196    fn read_sidecar_finds_matching_txt() {
197        let dir = TempDir::new().unwrap();
198        let img = dir.path().join("001-photo.jpg");
199        let txt = dir.path().join("001-photo.txt");
200        fs::write(&img, b"fake image").unwrap();
201        fs::write(&txt, "A beautiful sunset over the mountains").unwrap();
202
203        assert_eq!(
204            read_sidecar(&img),
205            Some("A beautiful sunset over the mountains".to_string())
206        );
207    }
208
209    #[test]
210    fn read_sidecar_returns_none_when_no_file() {
211        let dir = TempDir::new().unwrap();
212        let img = dir.path().join("001-photo.jpg");
213        assert_eq!(read_sidecar(&img), None);
214    }
215
216    #[test]
217    fn read_sidecar_returns_none_for_empty_file() {
218        let dir = TempDir::new().unwrap();
219        let img = dir.path().join("001-photo.jpg");
220        let txt = dir.path().join("001-photo.txt");
221        fs::write(&img, b"fake image").unwrap();
222        fs::write(&txt, "").unwrap();
223        assert_eq!(read_sidecar(&img), None);
224    }
225
226    #[test]
227    fn read_sidecar_returns_none_for_whitespace_only() {
228        let dir = TempDir::new().unwrap();
229        let img = dir.path().join("001-photo.jpg");
230        let txt = dir.path().join("001-photo.txt");
231        fs::write(&img, b"fake image").unwrap();
232        fs::write(&txt, "   \n  \t  ").unwrap();
233        assert_eq!(read_sidecar(&img), None);
234    }
235
236    #[test]
237    fn read_sidecar_trims_content() {
238        let dir = TempDir::new().unwrap();
239        let img = dir.path().join("001-photo.jpg");
240        let txt = dir.path().join("001-photo.txt");
241        fs::write(&img, b"fake image").unwrap();
242        fs::write(&txt, "\n  Some description  \n").unwrap();
243
244        assert_eq!(read_sidecar(&img), Some("Some description".to_string()));
245    }
246
247    // =========================================================================
248    // sanitize_slug() tests
249    // =========================================================================
250
251    #[test]
252    fn sanitize_slug_alphanumeric_passthrough() {
253        assert_eq!(sanitize_slug("hello-world"), "hello-world");
254        assert_eq!(sanitize_slug("Photo123"), "Photo123");
255    }
256
257    #[test]
258    fn sanitize_slug_replaces_spaces_and_special_chars() {
259        assert_eq!(sanitize_slug("My Great Photo!"), "My-Great-Photo");
260        assert_eq!(sanitize_slug("Hello World"), "Hello-World");
261        assert_eq!(sanitize_slug("foo@bar#baz"), "foo-bar-baz");
262    }
263
264    #[test]
265    fn sanitize_slug_collapses_consecutive_dashes() {
266        assert_eq!(sanitize_slug("a---b"), "a-b");
267        assert_eq!(sanitize_slug("a - b"), "a-b");
268        assert_eq!(sanitize_slug("hello   world"), "hello-world");
269    }
270
271    #[test]
272    fn sanitize_slug_strips_leading_trailing_dashes() {
273        assert_eq!(sanitize_slug("--hello--"), "hello");
274        assert_eq!(sanitize_slug("  hello  "), "hello");
275        assert_eq!(sanitize_slug("---"), "");
276    }
277
278    #[test]
279    fn sanitize_slug_truncates_long_titles() {
280        let long_title = "a-".repeat(50); // 100 chars
281        let result = sanitize_slug(&long_title);
282        assert!(result.len() <= MAX_SLUG_LEN);
283        assert!(!result.ends_with('-'));
284    }
285
286    #[test]
287    fn sanitize_slug_truncates_at_word_boundary() {
288        // 85 chars, should truncate to last dash before 80
289        let title = "this-is-a-very-long-title-that-exceeds-the-maximum-slug-length-and-should-be-truncated-here";
290        let result = sanitize_slug(title);
291        assert!(result.len() <= MAX_SLUG_LEN);
292        assert!(!result.contains("truncated"));
293    }
294
295    #[test]
296    fn sanitize_slug_handles_unicode() {
297        assert_eq!(sanitize_slug("café"), "caf");
298        assert_eq!(sanitize_slug("日本語"), "");
299        assert_eq!(sanitize_slug("München"), "M-nchen");
300    }
301
302    #[test]
303    fn sanitize_slug_empty_for_all_special_chars() {
304        assert_eq!(sanitize_slug("@#$%"), "");
305        assert_eq!(sanitize_slug("!!!"), "");
306    }
307
308    #[test]
309    fn sanitize_slug_preserves_existing_dashes() {
310        assert_eq!(sanitize_slug("my-photo-title"), "my-photo-title");
311    }
312}