Skip to main content

superstac_core/models/
catalog.rs

1use std::collections::{HashMap, HashSet};
2use std::str::FromStr;
3use std::time::Duration;
4
5use chrono::{DateTime, Utc};
6use serde::{Deserialize, Deserializer, Serialize};
7
8use crate::errors::{SuperSTACError, ValidationError};
9
10use crate::utils::{get_date_time, parse_url, validate_identifier};
11
12/// How often to poll a catalog's health endpoint.
13///
14/// YAML accepts a named variant (`minutely`, `hourly`, ...) or a
15/// `Custom` duration string like `"15s"`, `"30m"`, `"2h"`.
16#[derive(Clone, Debug, Serialize, Default, PartialEq)]
17#[serde(rename_all = "lowercase")]
18pub enum HealthCheckFrequencyStrategy {
19    Minutely,
20    #[default]
21    Hourly,
22    Daily,
23    Weekly,
24    Monthly,
25    Custom(Duration),
26}
27
28/// Per-catalog overrides. Anything unset falls back to [`Default`].
29#[derive(Clone, Debug, Serialize, Deserialize)]
30pub struct CatalogSettings {
31    pub health_check_strategy: HealthCheckFrequencyStrategy,
32    /// Status codes considered "healthy" (inclusive). Default `(200, 299)`.
33    pub healthy_status_code_range: (u16, u16),
34}
35
36impl Default for CatalogSettings {
37    fn default() -> Self {
38        CatalogSettings {
39            health_check_strategy: HealthCheckFrequencyStrategy::Hourly,
40            healthy_status_code_range: (200, 299),
41        }
42    }
43}
44impl HealthCheckFrequencyStrategy {
45    pub fn as_duration(&self) -> Duration {
46        match self {
47            HealthCheckFrequencyStrategy::Minutely => Duration::from_secs(60),
48            HealthCheckFrequencyStrategy::Hourly => Duration::from_secs(60 * 60),
49            HealthCheckFrequencyStrategy::Daily => Duration::from_secs(60 * 60 * 24),
50            HealthCheckFrequencyStrategy::Weekly => Duration::from_secs(60 * 60 * 24 * 7),
51            HealthCheckFrequencyStrategy::Monthly => Duration::from_secs(60 * 60 * 24 * 30),
52            HealthCheckFrequencyStrategy::Custom(dur) => *dur,
53        }
54    }
55}
56
57impl<'de> Deserialize<'de> for HealthCheckFrequencyStrategy {
58    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
59    where
60        D: Deserializer<'de>,
61    {
62        let s = String::deserialize(deserializer)?;
63        let s = s.trim().to_lowercase();
64
65        match s.as_str() {
66            "minutely" => Ok(HealthCheckFrequencyStrategy::Minutely),
67            "hourly" => Ok(HealthCheckFrequencyStrategy::Hourly),
68            "daily" => Ok(HealthCheckFrequencyStrategy::Daily),
69            "weekly" => Ok(HealthCheckFrequencyStrategy::Weekly),
70            "monthly" => Ok(HealthCheckFrequencyStrategy::Monthly),
71            _ => {
72                // Parse custom duration like "15m", "30s", "1h"
73                let dur = if s.ends_with("s") {
74                    let n = &s[..s.len() - 1]
75                        .parse::<u64>()
76                        .map_err(serde::de::Error::custom)?;
77                    Duration::from_secs(*n)
78                } else if s.ends_with("m") {
79                    let n = &s[..s.len() - 1]
80                        .parse::<u64>()
81                        .map_err(serde::de::Error::custom)?;
82                    Duration::from_secs(*n * 60)
83                } else if s.ends_with("h") {
84                    let n = &s[..s.len() - 1]
85                        .parse::<u64>()
86                        .map_err(serde::de::Error::custom)?;
87                    Duration::from_secs(*n * 3600)
88                } else {
89                    return Err(serde::de::Error::custom(format!("Invalid duration: {}", s)));
90                };
91                Ok(HealthCheckFrequencyStrategy::Custom(dur))
92            }
93        }
94    }
95}
96
97impl FromStr for HealthCheckFrequencyStrategy {
98    type Err = String;
99
100    fn from_str(s: &str) -> Result<Self, Self::Err> {
101        let s = s.trim();
102        if s.ends_with("s") {
103            let secs = s[..s.len() - 1]
104                .parse::<u64>()
105                .map_err(|_| "Invalid seconds")?;
106            Ok(HealthCheckFrequencyStrategy::Custom(Duration::from_secs(
107                secs,
108            )))
109        } else if s.ends_with("m") {
110            let mins = s[..s.len() - 1]
111                .parse::<u64>()
112                .map_err(|_| "Invalid minutes")?;
113            Ok(HealthCheckFrequencyStrategy::Custom(Duration::from_secs(
114                mins * 60,
115            )))
116        } else if s.ends_with("h") {
117            let hours = s[..s.len() - 1]
118                .parse::<u64>()
119                .map_err(|_| "Invalid hours")?;
120            Ok(HealthCheckFrequencyStrategy::Custom(Duration::from_secs(
121                hours * 3600,
122            )))
123        } else {
124            Err(format!("Invalid custom duration: {}", s))
125        }
126    }
127}
128
129#[derive(Clone, Debug, Serialize, Deserialize, Default)]
130pub struct HealthStatus {
131    pub endpoint: String,
132    pub available: bool,
133    pub last_checked: Option<DateTime<Utc>>,
134    /// The status code response from the STAC Catalog Server.
135    pub status_code: u16,
136}
137
138fn get_default_health_status(url: String) -> HealthStatus {
139    HealthStatus {
140        // defaults to false. Always assumes the health status is down. It will be updated after the first health check.
141        available: false,
142        // Defaults to the catalog url.
143        endpoint: url,
144        last_checked: Some(get_date_time()),
145        status_code: 200,
146    }
147}
148/// The capabilities of the STAC Catalog.
149#[derive(Clone, Debug, Serialize, Deserialize)]
150pub struct CatalogCapabilities {
151    filtering: String,
152}
153
154/// A data structure for a STAC catalog from the YAML file.
155#[derive(Debug, Deserialize)]
156pub struct CatalogConfig {
157    pub id: String,
158    pub provider: Option<String>,
159    pub title: Option<String>,
160    pub url: Option<String>,
161    pub description: Option<String>,
162    pub settings: Option<CatalogSettings>,
163    /// Maps canonical collection IDs to this catalog's local collection IDs.
164    /// E.g. `sentinel-2-l2a: S2MSI2A` on a CDSE-style catalog.
165    pub collection_aliases: Option<HashMap<String, String>>,
166    /// Per-collection asset rename rules, keyed by canonical collection ID.
167    /// Inner map: canonical asset key -> this catalog's local asset key.
168    /// E.g. `{ "sentinel-2-l2a": { "blue": "B02", "green": "B03" } }`.
169    pub asset_aliases: Option<HashMap<String, HashMap<String, String>>>,
170}
171
172impl TryFrom<CatalogConfig> for Catalog {
173    type Error = SuperSTACError;
174
175    fn try_from(cfg: CatalogConfig) -> Result<Self, Self::Error> {
176        validate_identifier(&cfg.id)?;
177
178        let url = match cfg.url {
179            Some(w) => {
180                parse_url(&w).map_err(|e| ValidationError::InvalidUrl(e.to_string()))?;
181                Some(w)
182            }
183            None => None,
184        };
185
186        Ok(Self {
187            id: cfg.id,
188            provider: None,
189            title: cfg.title,
190            url: url.clone().unwrap(),
191            description: cfg.description,
192            settings: cfg.settings.unwrap_or(CatalogSettings::default()),
193            health_status: get_default_health_status(url.unwrap()),
194            capabilities: None,
195            collection_aliases: cfg.collection_aliases.unwrap_or_default(),
196            asset_aliases: cfg.asset_aliases.unwrap_or_default(),
197            supported_collections: None,
198            created_at: Some(get_date_time()),
199            updated_at: None,
200        })
201    }
202}
203
204/// A STAC catalog endpoint registered with superstac.
205///
206/// Construct via [`Catalog::new`] for direct use, or by loading a YAML
207/// `superstac.yml` (which deserializes [`CatalogConfig`] and converts via
208/// `TryFrom`). `health_status` and `supported_collections` are managed by
209/// the engine — don't set them manually.
210#[derive(Clone, Debug, Serialize, Deserialize, Default)]
211pub struct Catalog {
212    pub id: String,
213    /// Linked provider id, or `None` if standalone.
214    pub provider: Option<String>,
215    pub title: Option<String>,
216    pub url: String,
217    pub description: Option<String>,
218    /// Per-catalog overrides for health check frequency, healthy status range, etc.
219    pub settings: CatalogSettings,
220    /// Updated by the engine's health monitor — don't set manually.
221    pub health_status: HealthStatus,
222    pub capabilities: Option<CatalogCapabilities>,
223    /// Maps canonical collection IDs (e.g. `sentinel-2-l2a`) to this catalog's
224    /// local collection IDs (e.g. `S2MSI2A`). Empty map means pass-through.
225    #[serde(default)]
226    pub collection_aliases: HashMap<String, String>,
227    /// Per-collection asset rename rules, keyed by canonical collection ID.
228    /// Inner map: canonical asset key -> this catalog's local asset key.
229    /// Empty means pass-through (catalog already uses canonical names).
230    #[serde(default)]
231    pub asset_aliases: HashMap<String, HashMap<String, String>>,
232    /// The set of canonical collection IDs this catalog is known to support.
233    /// `None` means not yet introspected — searches pass through to this catalog.
234    /// `Some(set)` is authoritative — searches for collections outside the set
235    /// will skip this catalog.
236    #[serde(default, skip_serializing_if = "Option::is_none")]
237    pub supported_collections: Option<HashSet<String>>,
238    /// Auto-populated on creation.
239    pub created_at: Option<DateTime<Utc>>,
240    /// Auto-populated on every update.
241    pub updated_at: Option<DateTime<Utc>>,
242}
243
244impl Catalog {
245    /// Validate and construct a catalog. Errors if the id contains non-ASCII
246    /// characters or the URL doesn't parse.
247    pub fn new(
248        id: &str,
249        title: Option<impl Into<String>>, 
250        url: &str,
251        description: Option<impl Into<String>>,
252        settings: Option<CatalogSettings>,
253    ) -> Result<Self, SuperSTACError> {
254        
255        validate_identifier(&id)?;
256        
257        let valid_url = parse_url(url)
258            .map_err(|err| SuperSTACError::from(ValidationError::InvalidUrl(err.to_string())))?;
259
260        let url_string = valid_url.to_string();
261
262        Ok(Self {
263            id: id.to_string(),
264            provider: None,
265            title: title.map(|t| t.into()),
266            url: url_string.clone(),
267            description: description.map(|d| d.into()),
268            settings: settings.unwrap_or_default(),
269            health_status: get_default_health_status(url_string),
270            capabilities: None,
271            collection_aliases: HashMap::new(),
272            asset_aliases: HashMap::new(),
273            supported_collections: None,
274            created_at: Some(get_date_time()),
275            updated_at: None,
276        })
277    }
278
279    /// Builder-style: attach collection aliases.
280    pub fn with_collection_aliases(mut self, aliases: HashMap<String, String>) -> Self {
281        self.collection_aliases = aliases;
282        self
283    }
284
285    /// Returns this catalog's local collection name for `canonical`, or
286    /// `canonical` itself when no alias is configured.
287    pub fn resolve_collection<'a>(&'a self, canonical: &'a str) -> &'a str {
288        self.collection_aliases
289            .get(canonical)
290            .map(String::as_str)
291            .unwrap_or(canonical)
292    }
293
294    /// Inverse of `resolve_collection`: returns the canonical collection name
295    /// for the given catalog-local name. Falls back to `local` when no alias
296    /// maps to it.
297    pub fn canonical_collection<'a>(&'a self, local: &'a str) -> &'a str {
298        for (canonical, l) in &self.collection_aliases {
299            if l == local {
300                return canonical;
301            }
302        }
303        local
304    }
305
306    /// Returns true if this catalog should be queried for the given canonical
307    /// collection IDs. Catalogs with `supported_collections = None` (not yet
308    /// introspected) pass through. Empty `requested` (no collection filter)
309    /// matches every catalog.
310    pub fn supports_any_of(&self, requested: &[String]) -> bool {
311        match &self.supported_collections {
312            None => true,
313            Some(set) => requested.is_empty() || requested.iter().any(|c| set.contains(c)),
314        }
315    }
316
317    /// Stamp `updated_at` with the current time.
318    pub fn set_update_date(&mut self) {
319        self.updated_at = Some(get_date_time());
320    }
321
322    /// Apply a partial update. `None` fields are left alone.
323    pub fn update(
324        &mut self,
325        description: Option<String>,
326        url: Option<String>,
327        title: Option<String>,
328        settings: Option<CatalogSettings>,
329    ) -> Result<(), ValidationError> {
330        if let Some(updated_url) = url {
331            self.url = parse_url(updated_url.as_str())
332                .map_err(|err| ValidationError::InvalidUrl(err.to_string()))?
333                .to_string();
334        }
335
336        self.title = title;
337        self.description = description;
338
339        if let Some(updated_settings) = settings {
340            self.settings = updated_settings;
341        }
342
343        self.set_update_date();
344        Ok(())
345    }
346
347    pub fn set_id(&mut self, id: String) {
348        self.id = id
349    }
350
351    pub fn set_provider(&mut self, provider: &str) {
352        self.provider = Some(provider.to_owned())
353    }
354}
355
356#[derive(Clone, Debug, Serialize, Deserialize)]
357pub struct CatalogFilters {
358    /// Performs an exact match on the `id` field.
359    pub id: Option<String>,
360    /// Performs a string search on the catalog provider's name and description.
361    pub provider: Option<String>,
362    /// Performs a string search on the catalog title.
363    pub title: Option<String>,
364    /// Performs a string search on the catalog description.
365    pub description: Option<String>,
366    /// Performs a boolean search on the catalog health status.
367    pub available: Option<bool>,
368
369    /// Performs a date search on the `created_at` field. Filters for for date `after` the provided date.
370    pub created_after: Option<DateTime<Utc>>,
371    /// Performs a date search on the `created_at` field. Filters for for date `before` the provided date.
372    pub created_before: Option<DateTime<Utc>>,
373    /// Performs a date search on the `updated_at` field. Filters for for date `after` the provided date.
374    pub updated_after: Option<DateTime<Utc>>,
375    /// Performs a date search on the `updated_at` field. Filters for for date `before` the provided date.
376    pub updated_before: Option<DateTime<Utc>>,
377}
378
379impl Default for CatalogFilters {
380    fn default() -> Self {
381        CatalogFilters {
382            id: None,
383            provider: None,
384            title: None,
385            description: None,
386            available: None,
387            created_after: None,
388            created_before: None,
389            updated_after: None,
390            updated_before: None,
391        }
392    }
393}
394
395#[derive(Clone, Debug, Serialize, Deserialize)]
396pub struct CatalogUpdate {
397    /// Attach a provider to a catalog using the provider id.
398    pub provider: Option<String>,
399    /// Updates the catalog title.
400    pub title: Option<String>,
401    /// Updates the catalog description.
402    pub description: Option<String>,
403    /// Updates the catalog url.
404    pub url: Option<String>,
405    /// Updates the catalog settings.
406    pub settings: Option<CatalogSettings>,
407}