dezoomify_rs/nypl/
mod.rs

1use std::sync::Arc;
2use std::iter::successors;
3use std::fmt::{Debug, Formatter};
4use std::collections::HashMap;
5
6use custom_error::custom_error;
7use regex::Regex;
8use serde::Deserialize;
9
10use crate::dezoomer::{TilesRect, Dezoomer, DezoomerInput, ZoomLevels, DezoomerError, IntoZoomLevels, DezoomerInputWithContents, TileReference};
11use crate::json_utils::number_or_string;
12use crate::Vec2d;
13
14/// A dezoomer for NYPL images
15#[derive(Default)]
16pub struct NYPLImage;
17
18const NYPL_IMAGE_VIEW_PREFIX: &str = "https://digitalcollections.nypl.org/items/";
19const NYPL_META_PREFIX: &str = "https://access.nypl.org/image.php/";
20const NYPL_META_POSTFIX: &str = "/tiles/config.js";
21
22fn get_image_id_from_meta_url(meta_url: &str) -> String {
23    meta_url.replace(NYPL_META_PREFIX, "")
24        .replace(NYPL_META_POSTFIX, "")
25}
26
27fn parse_image_id(image_view_url: &str) -> Option<String> {
28    Regex::new(r"https://digitalcollections.nypl.org/items/([a-f0-9\-]+)").unwrap()
29        .captures(image_view_url)
30        .and_then(|cap| cap.get(1))
31        .map(|m| m.as_str().to_string())
32}
33
34impl Dezoomer for NYPLImage {
35    fn name(&self) -> &'static str { "nypl" }
36    fn zoom_levels(&mut self, data: &DezoomerInput) -> Result<ZoomLevels, DezoomerError> {
37        if data.uri.starts_with(NYPL_IMAGE_VIEW_PREFIX) {
38            let image_view_url = data.uri.as_str();
39            let image_id = parse_image_id(image_view_url).ok_or_else(||
40                DezoomerError::wrap(NYPLError::NoIdInUrl { url: image_view_url.to_string() })
41            )?;
42            let meta_uri = format!("{}{}{}", NYPL_META_PREFIX, image_id, NYPL_META_POSTFIX);
43            Err(DezoomerError::NeedsData { uri: meta_uri })
44        } else {
45            self.assert(data.uri.contains(NYPL_META_PREFIX))?;
46            let DezoomerInputWithContents { uri, contents } = data.with_contents()?;
47            let iter = iter_levels(uri, contents).map_err(DezoomerError::wrap)?;
48            Ok(iter.into_zoom_levels())
49        }
50    }
51}
52
53fn arcs<T, U: ?Sized>(v: T) -> impl Iterator<Item=Arc<U>>
54    where Arc<U>: From<T> {
55    successors(Some(Arc::from(v)), |x| Some(Arc::clone(x)))
56}
57
58fn iter_levels(uri: &str, contents: &[u8])
59               -> Result<impl Iterator<Item=Level> + 'static, NYPLError> {
60    if contents.is_empty() {
61        return Err(NYPLError::NoMetadata);
62    }
63    let base = get_image_id_from_meta_url(uri);
64    let mut meta_map: MetadataRoot = serde_json::from_slice(contents)?;
65    let (_, meta) = meta_map.configs.drain()
66        .find(|(k, _v)| k == "0")
67        .ok_or(NYPLError::NoMetadata)?;
68
69    let level_count: u32 = meta.level_count();
70    let levels =
71        (0..=level_count).zip(arcs(base)).zip(arcs(meta))
72            .map(|((level, base), metadata)|
73                Level { metadata, base, level });
74    Ok(levels)
75}
76
77#[derive(PartialEq, Eq)]
78struct Level {
79    metadata: Arc<Metadata>,
80    base: Arc<str>,
81    level: u32,
82}
83
84impl Debug for Level {
85    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
86        write!(f, "NYPL Image")
87    }
88}
89
90impl TilesRect for Level {
91    fn size(&self) -> Vec2d {
92        let reverse_level = self.metadata.level_count() - self.level;
93        Vec2d::from(self.metadata.size) / 2_u32.pow(reverse_level)
94    }
95
96    fn tile_size(&self) -> Vec2d { Vec2d::square(self.metadata.tile_size) }
97
98    fn tile_url(&self, Vec2d { x, y }: Vec2d) -> String {
99        format!("https://access.nypl.org/image.php/{id}/tiles/0/{level}/{x}_{y}.{format}",
100                id = self.base,
101                level = self.level,
102                x = x,
103                y = y,
104                format = self.metadata.format,
105        )
106    }
107
108    fn tile_ref(&self, pos: Vec2d) -> TileReference {
109        let delta = Vec2d {
110            x: if pos.x == 0 { 0 } else { self.metadata.overlap },
111            y: if pos.y == 0 { 0 } else { self.metadata.overlap },
112        };
113        TileReference {
114            url: self.tile_url(pos),
115            position: self.tile_size() * pos - delta,
116        }
117    }
118}
119
120#[derive(Debug, PartialEq, Eq, Deserialize)]
121pub struct MetadataRoot {
122    configs: HashMap<String, Metadata>,
123}
124
125#[derive(Debug, PartialEq, Eq, Deserialize)]
126pub struct Metadata {
127    size: MetadataSize,
128    #[serde(alias = "tilesize", deserialize_with = "number_or_string")]
129    tile_size: u32,
130    format: String,
131    #[serde(default = "Default::default", deserialize_with = "number_or_string")]
132    overlap: u32,
133}
134
135impl Metadata {
136    fn level_count(&self) -> u32 {
137        let max_dim: u32 = self.size.width.max(self.size.height);
138        32 - max_dim.leading_zeros()
139    }
140}
141
142impl From<MetadataSize> for Vec2d {
143    fn from(s: MetadataSize) -> Self {
144        Vec2d { x: s.width, y: s.height }
145    }
146}
147
148#[derive(Debug, PartialEq, Eq, Clone, Copy, Deserialize)]
149struct MetadataSize {
150    #[serde(deserialize_with = "number_or_string")]
151    width: u32,
152    #[serde(deserialize_with = "number_or_string")]
153    height: u32,
154}
155
156custom_error! {pub NYPLError
157    JsonError{resp: String} = "Failed to parse NYPL Image meta as json, \
158        got content(blank shows the site has no zoom function for this one):\n {resp}",
159    Utf8{source: std::str::Utf8Error} = "Invalid NYPL metadata file: {source}",
160    NoIdInUrl{url: String} = "Unable to extract an image id from {url:?}",
161    BadMetadata{source: serde_json::Error} = "Invalid nypl metadata: {source}",
162    NoMetadata = "No metadata found. This image is probably not tiled, \
163    and you can download it directly by right-clicking on it from \
164    your browser without any external tool.",
165}
166
167#[cfg(test)]
168mod tests {
169    use super::*;
170
171    #[test]
172    fn test_parse_metadata() {
173        let contents = r#"
174        {
175          "configs":{
176            "0":{
177              "size":{
178                "width":"2422",
179                "height":"3000"
180              },
181              "tilesize":"256",
182              "overlap":"2",
183              "format":"png"
184            },
185            "90":{
186              "size":{
187                "width":"3000",
188                "height":"2422"
189              },
190              "tilesize":"256",
191              "overlap":"2",
192              "format":"png"
193            },
194            "180":{
195              "size":{
196                "width":"2422",
197                "height":"3000"
198              },
199              "tilesize":"256",
200              "overlap":"2",
201              "format":"png"
202            },
203            "270":{
204              "size":{
205                "width":"3000",
206                "height":"2422"
207              },
208              "tilesize":"256",
209              "overlap":"2",
210              "format":"png"
211            }
212          }
213        }
214        "#.as_bytes();
215        let base: Arc<String> = Arc::new("a28d6e6b-b317-f008-e040-e00a1806635d".into());
216        let level: Level = iter_levels(&base, contents).unwrap().last().unwrap();
217        assert_eq!(level.metadata, Arc::new(Metadata {
218            size: MetadataSize { width: 2422, height: 3000 },
219            tile_size: 256,
220            format: "png".to_string(),
221            overlap: 2,
222        }));
223        let expected_url = "https://access.nypl.org/image.php/\
224            a28d6e6b-b317-f008-e040-e00a1806635d\
225            /tiles/0/12/0_0.png";
226        assert_eq!(level.tile_url(Vec2d { x: 0, y: 0 }), expected_url);
227        assert_eq!(
228            parse_image_id(
229                "https://digitalcollections.nypl.org/items/a14f3200-fac1-012f-f7a4-58d385a7bbd0#item-data"
230            ).unwrap(),
231            "a14f3200-fac1-012f-f7a4-58d385a7bbd0",
232        )
233    }
234}