sdml_parse/
load.rs

1/*!
2This module contains implementations of the [`ModuleResolver`] and [`ModuleLoader`] traits for
3file-system based modules.
4*/
5
6use crate::parse::parse_str;
7use codespan_reporting::files::SimpleFiles;
8use sdml_core::load::{ModuleLoader, ModuleResolver};
9use sdml_core::model::identifiers::Identifier;
10use sdml_core::model::modules::{HeaderValue, Module};
11use sdml_core::model::{HasName, HasSourceSpan};
12use sdml_core::stdlib;
13use sdml_core::store::ModuleStore;
14use sdml_errors::diagnostics::reporter::ReportCounters;
15use sdml_errors::diagnostics::SeverityFilter;
16use sdml_errors::diagnostics::{functions::imported_module_not_found, StandardStreamReporter};
17use sdml_errors::{Diagnostic, Reporter, Source, SourceFiles};
18use sdml_errors::{Error, FileId};
19use search_path::SearchPath;
20use serde::{Deserialize, Serialize};
21use std::collections::HashMap;
22use std::fs::File;
23use std::io::Read;
24use std::path::{Path, PathBuf};
25use tracing::{debug, error, info, trace, warn};
26use url::Url;
27
28// ------------------------------------------------------------------------------------------------
29// Public Types
30// ------------------------------------------------------------------------------------------------
31
32///
33/// The resolver implements the logic to map module identifiers to file system paths using the
34/// environment variable `SDML_PATH` to contain a search path.
35///
36#[derive(Clone, Debug)]
37pub struct FsModuleResolver {
38    catalog: Option<ModuleCatalog>,
39    search_path: SearchPath,
40}
41
42/// The name of the SDML environment variable that may be used to hold a load path.
43pub const SDML_RESOLVER_PATH_VARIABLE: &str = "SDML_PATH";
44
45/// The recommended file extension for SDML resources.
46pub const SDML_FILE_EXTENSION: &str = "sdm";
47
48/// The alternate file extension for SDML resources.
49pub const SDML_FILE_EXTENSION_LONG: &str = "sdml";
50
51/// The name used for resolver catalog files.
52pub const SDML_CATALOG_FILE_NAME: &str = "sdml-catalog.json";
53
54///
55/// The loader is used to manage the process of creating an in-memory model from file-system resources.
56///
57/// A Module Loader is therefore responsible for:
58///
59/// 1. finding the resource that contains a module definition,
60/// 2. parsing the source into an in-memory representation,
61/// 3. caching the loaded module, and it's source, for future use.
62///
63#[derive(Debug)]
64pub struct FsModuleLoader {
65    resolver: FsModuleResolver,
66    module_file_ids: HashMap<Identifier, usize>,
67    module_files: SourceFiles,
68    reporter: Box<dyn Reporter>,
69}
70
71// ------------------------------------------------------------------------------------------------
72
73///
74/// This type represents the content of a resolver file.
75///
76#[derive(Clone, Debug, Deserialize, Serialize)]
77#[serde(rename_all = "snake_case")]
78pub struct ModuleCatalog {
79    base: Url,
80    #[serde(skip)]
81    loaded_from: PathBuf,
82    entries: HashMap<String, CatalogEntry>,
83}
84
85///
86/// An entry in a resolver catalog file is either an item or group of items.
87///
88#[derive(Clone, Debug, Deserialize, Serialize)]
89#[serde(rename_all = "snake_case")]
90pub enum CatalogEntry {
91    Group(Group),
92    Item(Item),
93}
94
95///
96/// A resolver group allows the common configuration of multiple items.
97///
98#[derive(Clone, Debug, Deserialize, Serialize)]
99#[serde(rename_all = "snake_case")]
100pub struct Group {
101    #[serde(default, skip_serializing_if = "Option::is_none")]
102    relative_url: Option<String>,
103    #[serde(default, skip_serializing_if = "Option::is_none")]
104    relative_path: Option<PathBuf>,
105    entries: HashMap<String, Item>,
106}
107
108///
109/// A specific resolver item.
110///
111#[derive(Clone, Debug, Deserialize, Serialize)]
112#[serde(rename_all = "snake_case")]
113pub struct Item {
114    relative_url: String,
115    relative_path: PathBuf,
116}
117
118// ------------------------------------------------------------------------------------------------
119// Private Macros
120// ------------------------------------------------------------------------------------------------
121
122macro_rules! trace_entry {
123    ($type_name: literal, $fn_name: literal) => {
124        const FULL_NAME: &str = concat!($type_name, "::", $fn_name);
125        let tracing_span = ::tracing::trace_span!(FULL_NAME);
126        let _enter_span = tracing_span.enter();
127        ::tracing::trace!("{FULL_NAME}()");
128    };
129    ($type_name: literal, $fn_name: literal => $format: literal, $( $value: expr ),+ ) => {
130        const FULL_NAME: &str = concat!($type_name, "::", $fn_name);
131        let tracing_span = ::tracing::trace_span!(FULL_NAME);
132        let _enter_span = tracing_span.enter();
133        let arguments = format!($format, $( $value ),+);
134        ::tracing::trace!("{FULL_NAME}({arguments})");
135    };
136}
137
138// ------------------------------------------------------------------------------------------------
139// Implementations
140// ------------------------------------------------------------------------------------------------
141
142impl Default for FsModuleResolver {
143    fn default() -> Self {
144        trace_entry!("ModuleResolver", "default");
145
146        // 1. Use the standard environment variable as a search path
147        let mut search_path = SearchPath::new_or_default(SDML_RESOLVER_PATH_VARIABLE);
148
149        // 2. Add the current directory to the search path
150        search_path.prepend_cwd();
151
152        // 3. Load any catalog file found in the search path
153        let catalog = ModuleCatalog::load_from_current(true);
154
155        let _self = Self {
156            catalog,
157            search_path,
158        };
159
160        trace!("=> {:?}", _self);
161        _self
162    }
163}
164
165impl ModuleResolver for FsModuleResolver {
166    fn name_to_resource(&self, name: &Identifier, from: Option<FileId>) -> Result<Url, Error> {
167        Url::from_file_path(self.name_to_path(name, from)?)
168            .map_err(|_| Error::UrlParseError { source: None })
169    }
170}
171
172impl FsModuleResolver {
173    /// Add the provided path to the beginning of the search list.
174    pub fn prepend_to_search_path(&mut self, path: &Path) {
175        self.search_path.append(PathBuf::from(path));
176    }
177
178    /// Add the provided path to the end of the search list.
179    pub fn append_to_search_path(&mut self, path: &Path) {
180        self.search_path.append(PathBuf::from(path));
181    }
182
183    /// Return a file system path for the resource that /should/ contain the named module.
184    pub fn name_to_path(&self, name: &Identifier, from: Option<FileId>) -> Result<PathBuf, Error> {
185        trace_entry!("ModuleResolver", "name_to_path" => "{}", name);
186        if let Some(catalog) = &self.catalog {
187            let name: String = name.to_string();
188            if let Some(path) = catalog.resolve_local_path(&name) {
189                trace!("Found module in catalog, path: {path:?}");
190                return Ok(path);
191            }
192        }
193        self.search_path
194            .find(format!("{}.{}", name, SDML_FILE_EXTENSION).as_ref())
195            .or_else(|| {
196                self.search_path
197                    .find(format!("{}/{}.{}", name, name, SDML_FILE_EXTENSION).as_ref())
198                    .or_else(|| {
199                        self.search_path
200                            .find(format!("{}.{}", name, SDML_FILE_EXTENSION_LONG).as_ref())
201                            .or_else(|| {
202                                self.search_path.find(
203                                    format!("{}/{}.{}", name, name, SDML_FILE_EXTENSION_LONG)
204                                        .as_ref(),
205                                )
206                            })
207                    })
208            })
209            .ok_or_else(|| {
210                imported_module_not_found(
211                    from.unwrap_or_default(),
212                    name.source_span().map(|span| span.into()),
213                    name,
214                )
215                .into()
216            })
217    }
218}
219
220// ------------------------------------------------------------------------------------------------
221
222impl Default for FsModuleLoader {
223    fn default() -> Self {
224        Self {
225            resolver: Default::default(),
226            module_file_ids: Default::default(),
227            module_files: SimpleFiles::new(),
228            reporter: Box::<StandardStreamReporter>::default(),
229        }
230    }
231}
232
233impl ModuleLoader for FsModuleLoader {
234    fn load(
235        &mut self,
236        name: &Identifier,
237        from: Option<FileId>,
238        cache: &mut impl ModuleStore,
239        recursive: bool,
240    ) -> Result<Identifier, Error> {
241        trace_entry!("ModuleLoader", "load" => "{}", name);
242        if stdlib::get_library_module_implementation(name).is_some() {
243            Ok(name.clone())
244        } else {
245            let file = match self.resolver.name_to_path(name, from) {
246                Ok(f) => f,
247                Err(Error::LanguageValidationError { source }) => {
248                    self.report(&source)?;
249                    return Err(source.into());
250                }
251                Err(e) => return Err(e),
252            };
253            self.load_from_file(file, cache, recursive)
254        }
255    }
256
257    fn resolver(&self) -> &impl ModuleResolver {
258        &self.resolver
259    }
260
261    fn get_file_id(&self, name: &Identifier) -> Option<sdml_errors::FileId> {
262        self.module_file_ids.get(name).copied()
263    }
264
265    fn get_source(&self, file_id: FileId) -> Option<Source> {
266        match self.files().get(file_id) {
267            Ok(file) => Some(file.source().clone()),
268            Err(err) => {
269                error!("Could not retrieve module: {file_id:?}, error: {err}");
270                None
271            }
272        }
273    }
274
275    fn report(&self, diagnostic: &Diagnostic) -> Result<(), Error> {
276        self.reporter.emit(diagnostic, self.files())
277    }
278
279    fn reporter_done(&self, top_module_name: Option<String>) -> Result<ReportCounters, Error> {
280        self.reporter.done(top_module_name)
281    }
282
283    fn set_severity_filter(&mut self, filter: SeverityFilter) {
284        self.reporter.set_severity_filter(filter);
285    }
286}
287
288impl FsModuleLoader {
289    pub fn with_resolver(self, resolver: FsModuleResolver) -> Self {
290        Self { resolver, ..self }
291    }
292
293    pub fn with_reporter(self, reporter: Box<dyn Reporter>) -> Self {
294        Self { reporter, ..self }
295    }
296
297    /// Load a module from the source in `file`.
298    pub fn load_from_file(
299        &mut self,
300        file: PathBuf,
301        cache: &mut impl ModuleStore,
302        recursive: bool,
303    ) -> Result<Identifier, Error> {
304        trace_entry!("ModuleLoader", "load_from_file" => "{:?}", file);
305        let mut reader = File::open(&file)?;
306        let catalog = self.resolver.catalog.clone();
307        let module_name = self.load_inner(&mut reader, Some(file.clone()), cache, recursive)?;
308        let module = cache.get_mut(&module_name).unwrap();
309        module.set_source_file(file.clone());
310        if !module.has_base_uri() {
311            self.set_base_uri(module, &file, &catalog)?;
312        }
313        Ok(module_name)
314    }
315
316    fn set_base_uri(
317        &self,
318        module: &mut Module,
319        file: &Path,
320        catalog: &Option<ModuleCatalog>,
321    ) -> Result<(), Error> {
322        if let Some(catalog) = catalog {
323            let name = module.name().to_string();
324            if let Some(url) = catalog.resolve_uri(&name) {
325                module.set_base_uri(HeaderValue::from(url));
326                return Ok(());
327            }
328        }
329
330        let file = file.canonicalize()?;
331        match Url::parse(&format!(
332            "http://example.org{}/",
333            file.to_string_lossy().as_ref()
334        )) {
335            Ok(base) => module.set_base_uri(HeaderValue::from(base)),
336            Err(_) => warn!("Could not construct a base URI"),
337        }
338        Ok(())
339    }
340
341    /// Load a module reading the source from `reader`.
342    pub fn load_from_reader(
343        &mut self,
344        reader: &mut dyn Read,
345        cache: &mut impl ModuleStore,
346        recursive: bool,
347    ) -> Result<Identifier, Error> {
348        trace_entry!("ModuleLoader", "load_from_reader");
349        self.load_inner(reader, None, cache, recursive)
350    }
351
352    fn load_inner(
353        &mut self,
354        reader: &mut dyn Read,
355        file: Option<PathBuf>,
356        cache: &mut impl ModuleStore,
357        recursive: bool,
358    ) -> Result<Identifier, Error> {
359        trace!("ModuleLoader::load_inner(..., {file:?}, ..., {recursive})");
360        let mut source = String::new();
361        reader.read_to_string(&mut source)?;
362        let file_name: String = file
363            .map(|p| p.to_string_lossy().into_owned())
364            .unwrap_or_default();
365        let file_id = self.module_files.add(file_name, source.into());
366
367        let module = parse_str(file_id, self)?;
368
369        let name = module.name().clone();
370
371        let _ = self.module_file_ids.insert(name.clone(), file_id);
372
373        cache.insert(module);
374
375        if recursive {
376            let dependencies = {
377                let module = cache.get(&name).unwrap();
378                module
379                    .imported_modules()
380                    .into_iter()
381                    .cloned()
382                    .collect::<Vec<Identifier>>()
383            };
384            for name in &dependencies {
385                if !cache.contains(name) {
386                    debug!("didn't find module {name} in cache, loading");
387                    // TODO: this bails on the first missing import, is that what we want?
388                    self.load(name, Some(file_id), cache, recursive)?;
389                } else {
390                    debug!("found module {name} in cache");
391                }
392            }
393        }
394        Ok(name)
395    }
396
397    #[inline(always)]
398    pub(crate) fn files(&self) -> &SimpleFiles<String, Source> {
399        &self.module_files
400    }
401}
402
403// ------------------------------------------------------------------------------------------------
404
405impl ModuleCatalog {
406    ///
407    /// Load a resolver catalog file from the current directory.
408    ///
409    /// If the parameter `look_in_parents` is `true` this will check parent directories.
410    ///
411    pub fn load_from_current(look_in_parents: bool) -> Option<Self> {
412        trace!("ModuleCatalog::load_from_current({look_in_parents})");
413        let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
414        Self::load_from(&cwd, look_in_parents)
415    }
416
417    ///
418    /// Load a resolver catalog file from the `path`.
419    ///
420    /// If the parameter `look_in_parents` is `true` this will check parent directories.
421    ///
422    pub fn load_from(path: &Path, look_in_parents: bool) -> Option<Self> {
423        trace!("ModuleCatalog::load_from({path:?}, {look_in_parents})");
424        if path.is_file() {
425            Self::load_from_file(path)
426        } else if path.is_dir() {
427            let file = path.join(SDML_CATALOG_FILE_NAME);
428            if file.is_file() {
429                Self::load_from_file(&file)
430            } else if look_in_parents {
431                if let Some(parent_path) = path.parent() {
432                    Self::load_from(parent_path, look_in_parents)
433                } else {
434                    warn!("No catalog file found in file-system parent path");
435                    None
436                }
437            } else {
438                warn!("No catalog found in provided directory");
439                None
440            }
441        } else {
442            warn!("The provided path was not a file or directory");
443            None
444        }
445    }
446
447    ///
448    /// Load from the `file` path, this has been found by one of the methods above and so it should
449    /// exist.
450    ///
451    fn load_from_file(file: &Path) -> Option<Self> {
452        match std::fs::read_to_string(file) {
453            Ok(source) => match serde_json::from_str::<ModuleCatalog>(&source) {
454                Ok(mut catalog) => {
455                    catalog.loaded_from = file.parent().unwrap().to_path_buf();
456                    info!("Loaded catalog, file: {file:?}");
457                    Some(catalog)
458                }
459                Err(e) => {
460                    error!("Error parsing catalog, file: {file:?}, error: {e}");
461                    None
462                }
463            },
464            Err(e) => {
465                error!("Error reading catalog, file: {file:?}, error: {e}");
466                None
467            }
468        }
469    }
470
471    // --------------------------------------------------------------------------------------------
472
473    pub fn base(&self) -> &Url {
474        &self.base
475    }
476
477    pub fn set_base(&mut self, base: Url) {
478        self.base = base;
479    }
480
481    // --------------------------------------------------------------------------------------------
482
483    pub fn loaded_from(&self) -> &PathBuf {
484        &self.loaded_from
485    }
486
487    // --------------------------------------------------------------------------------------------
488
489    pub fn has_entries(&self) -> bool {
490        !self.entries.is_empty()
491    }
492
493    pub fn get_entry(&self, key: &String) -> Option<&CatalogEntry> {
494        self.entries.get(key)
495    }
496
497    pub fn entries_contains_key(&self, key: &String) -> bool {
498        self.entries.contains_key(key)
499    }
500
501    pub fn entries(&self) -> impl Iterator<Item = (&String, &CatalogEntry)> {
502        self.entries.iter()
503    }
504
505    pub fn entry_keys(&self) -> impl Iterator<Item = &String> {
506        self.entries.keys()
507    }
508
509    pub fn entry_values(&self) -> impl Iterator<Item = &CatalogEntry> {
510        self.entries.values()
511    }
512
513    // --------------------------------------------------------------------------------------------
514
515    pub fn groups(&self) -> impl Iterator<Item = (&String, &Group)> {
516        self.entries()
517            .filter_map(|(k, e)| e.as_group().map(|group| (k, group)))
518    }
519
520    // --------------------------------------------------------------------------------------------
521
522    pub fn items(&self) -> impl Iterator<Item = (&String, &Item)> {
523        self.entries()
524            .filter_map(|(k, e)| e.as_item().map(|item| (k, item)))
525    }
526
527    // --------------------------------------------------------------------------------------------
528
529    pub fn resolve_uri(&self, module: &String) -> Option<Url> {
530        if let Some(CatalogEntry::Item(item)) = self.get_entry(module) {
531            Some(self.base.join(item.relative_url().as_str()).unwrap())
532        } else {
533            self.groups()
534                .find(|(_, g)| g.entries_contains_key(module))
535                .map(|(_, g)| g.resolve_uri(&self.base, module))
536                .unwrap_or_default()
537        }
538    }
539
540    pub fn resolve_local_path(&self, module: &String) -> Option<PathBuf> {
541        if let Some(CatalogEntry::Item(item)) = self.get_entry(module) {
542            Some(self.loaded_from.join(item.relative_path()))
543        } else {
544            self.groups()
545                .find(|(_, g)| g.entries_contains_key(module))
546                .map(|(_, g)| g.resolve_local_path(&self.loaded_from, module))
547                .unwrap_or_default()
548        }
549    }
550}
551
552impl From<Group> for CatalogEntry {
553    fn from(value: Group) -> Self {
554        Self::Group(value)
555    }
556}
557
558impl From<Item> for CatalogEntry {
559    fn from(value: Item) -> Self {
560        Self::Item(value)
561    }
562}
563
564impl CatalogEntry {
565    pub fn is_group(&self) -> bool {
566        matches!(self, Self::Group(_))
567    }
568    pub fn as_group(&self) -> Option<&Group> {
569        match self {
570            Self::Group(v) => Some(v),
571            _ => None,
572        }
573    }
574
575    // --------------------------------------------------------------------------------------------
576
577    pub fn is_item(&self) -> bool {
578        matches!(self, Self::Item(_))
579    }
580    pub fn as_item(&self) -> Option<&Item> {
581        match self {
582            Self::Item(v) => Some(v),
583            _ => None,
584        }
585    }
586}
587
588impl Group {
589    pub fn relative_path(&self) -> Option<&PathBuf> {
590        self.relative_path.as_ref()
591    }
592    pub fn set_relative_path(&mut self, relative_path: PathBuf) {
593        self.relative_path = Some(relative_path);
594    }
595    pub fn unset_relative_path(&mut self) {
596        self.relative_path = None;
597    }
598
599    // --------------------------------------------------------------------------------------------
600
601    pub fn relative_url(&self) -> Option<&String> {
602        self.relative_url.as_ref()
603    }
604    pub fn set_relative_url(&mut self, relative_url: String) {
605        self.relative_url = Some(relative_url);
606    }
607    pub fn unset_relative_url(&mut self) {
608        self.relative_url = None;
609    }
610
611    // --------------------------------------------------------------------------------------------
612
613    pub fn has_entries(&self) -> bool {
614        !self.entries.is_empty()
615    }
616
617    pub fn get_entry(&self, key: &String) -> Option<&Item> {
618        self.entries.get(key)
619    }
620
621    pub fn entries_contains_key(&self, key: &String) -> bool {
622        self.entries.contains_key(key)
623    }
624
625    pub fn entries(&self) -> impl Iterator<Item = (&String, &Item)> {
626        self.entries.iter()
627    }
628
629    pub fn entry_keys(&self) -> impl Iterator<Item = &String> {
630        self.entries.keys()
631    }
632
633    pub fn entry_values(&self) -> impl Iterator<Item = &Item> {
634        self.entries.values()
635    }
636
637    // --------------------------------------------------------------------------------------------
638
639    pub fn resolve_uri(&self, base: &Url, module: &String) -> Option<Url> {
640        let base = if let Some(relative_url) = &self.relative_url {
641            base.join(relative_url.as_str()).unwrap()
642        } else {
643            base.clone()
644        };
645        self.get_entry(module)
646            .map(|item| base.join(item.relative_url().as_str()).unwrap())
647    }
648
649    pub fn resolve_local_path(&self, base: &Path, module: &String) -> Option<PathBuf> {
650        let base = if let Some(group_base) = &self.relative_path {
651            base.join(group_base)
652        } else {
653            base.to_path_buf()
654        };
655        self.get_entry(module)
656            .map(|item| base.join(item.relative_url().as_str()))
657    }
658}
659
660impl Item {
661    pub fn relative_path(&self) -> &PathBuf {
662        &self.relative_path
663    }
664
665    pub fn set_relative_path(&mut self, relative_path: PathBuf) {
666        self.relative_path = relative_path;
667    }
668
669    // --------------------------------------------------------------------------------------------
670
671    pub fn relative_url(&self) -> &String {
672        &self.relative_url
673    }
674
675    pub fn set_relative_url(&mut self, relative_url: String) {
676        self.relative_url = relative_url;
677    }
678}
679
680// ------------------------------------------------------------------------------------------------
681// Unit Tests
682// ------------------------------------------------------------------------------------------------
683
684#[cfg(test)]
685mod tests {
686    use super::*;
687
688    #[test]
689    fn test_generate_catalog() {
690        let catalog = ModuleCatalog {
691            base: Url::parse("https://example.org/schema/").unwrap(),
692            loaded_from: PathBuf::from("."),
693            entries: vec![(
694                String::from("rentals"),
695                CatalogEntry::Item(Item {
696                    relative_url: String::from("rentals/v1/"),
697                    relative_path: PathBuf::from("examples/rentals.sdm"),
698                }),
699            )]
700            .into_iter()
701            .collect(),
702        };
703        println!("{}", serde_json::to_string_pretty(&catalog).unwrap());
704    }
705
706    #[test]
707    fn test_parse_catalog() {
708        let _: ModuleCatalog = serde_json::from_str(
709            r#"{
710  "base": "https://example.org/rentals/",
711  "entries": {
712    "vehicle": {
713      "item": {
714        "relative_url": "vehicle#",
715        "relative_path": "vehicle-v1.sdm"
716      }
717    }
718  }
719}"#,
720        )
721        .unwrap();
722    }
723
724    #[test]
725    fn test_parse_catalog_with_group() {
726        let _: ModuleCatalog = serde_json::from_str(
727            r#"{
728  "base": "https://example.org/rentals/",
729  "entries": {
730    "rentals": {
731      "group": {
732        "relative_name": "entities/",
733        "relative_path": "/entities-v1",
734        "entries": {
735            "item": {
736              "relative_url": "vehicle#",
737              "relative_path": "vehicle-v1.sdm"
738          }
739        }
740      }
741    }
742  }
743}"#,
744        )
745        .unwrap();
746    }
747}