sdml_parse/
load.rs

1/*!
2This module contains implementations of the [`ModuleResolver`] and [`ModuleLoader`] traits for
3file-system based modules.
4*/
5
6use crate::parse::parse_str;
7use codespan_reporting::files::SimpleFiles;
8use sdml_core::load::{ModuleLoader, ModuleResolver};
9use sdml_core::model::identifiers::Identifier;
10use sdml_core::model::modules::HeaderValue;
11use sdml_core::model::{HasName, HasSourceSpan};
12use sdml_core::stdlib;
13use sdml_core::store::ModuleStore;
14use sdml_errors::diagnostics::reporter::ReportCounters;
15use sdml_errors::diagnostics::SeverityFilter;
16use sdml_errors::diagnostics::{functions::imported_module_not_found, StandardStreamReporter};
17use sdml_errors::{Diagnostic, Reporter, Source, SourceFiles};
18use sdml_errors::{Error, FileId};
19use search_path::SearchPath;
20use serde::{Deserialize, Serialize};
21use std::collections::HashMap;
22use std::env;
23use std::fs::File;
24use std::io::Read;
25use std::path::{Path, PathBuf};
26use tracing::{debug, error, info, trace, warn};
27use url::Url;
28
29// ------------------------------------------------------------------------------------------------
30// Public Types
31// ------------------------------------------------------------------------------------------------
32
33///
34/// The resolver implements the logic to map module identifiers to file system paths using the
35/// environment variable `SDML_PATH` to contain a search path.
36///
37#[derive(Clone, Debug)]
38pub struct FsModuleResolver {
39    catalog: Option<ModuleCatalog>,
40    search_path: SearchPath,
41}
42
43/// The name of the SDML environment variable that may be used to hold a load path.
44pub const SDML_RESOLVER_PATH_VARIABLE: &str = "SDML_PATH";
45
46/// The recommended file extension for SDML resources.
47pub const SDML_FILE_EXTENSION: &str = "sdm";
48
49/// The alternate file extension for SDML resources.
50pub const SDML_FILE_EXTENSION_LONG: &str = "sdml";
51
52/// The name used for resolver catalog files.
53pub const SDML_CATALOG_FILE_NAME: &str = "sdml-catalog.json";
54
55/// The environment variable used to override resolver catalog file location.
56pub const SDML_CATALOG_FILE_VARIABLE: &str = "SDML_CATALOG_FILE";
57
58///
59/// The loader is used to manage the process of creating an in-memory model from file-system resources.
60///
61/// A Module Loader is therefore responsible for:
62///
63/// 1. finding the resource that contains a module definition,
64/// 2. parsing the source into an in-memory representation,
65/// 3. caching the loaded module, and it's source, for future use.
66///
67#[derive(Debug)]
68pub struct FsModuleLoader {
69    resolver: FsModuleResolver,
70    module_file_ids: HashMap<Identifier, usize>,
71    module_files: SourceFiles,
72    reporter: Box<dyn Reporter>,
73}
74
75// ------------------------------------------------------------------------------------------------
76
77///
78/// This type represents the content of a resolver file.
79///
80#[derive(Clone, Debug, Deserialize, Serialize)]
81#[serde(rename_all = "snake_case")]
82pub struct ModuleCatalog {
83    base: Url,
84    #[serde(skip)]
85    loaded_from: PathBuf,
86    entries: HashMap<String, CatalogEntry>,
87}
88
89///
90/// An entry in a resolver catalog file is either an item or group of items.
91///
92#[derive(Clone, Debug, Deserialize, Serialize)]
93#[serde(rename_all = "snake_case")]
94pub enum CatalogEntry {
95    Group(Group),
96    Item(Item),
97}
98
99///
100/// A resolver group allows the common configuration of multiple items.
101///
102#[derive(Clone, Debug, Deserialize, Serialize)]
103#[serde(rename_all = "snake_case")]
104pub struct Group {
105    #[serde(default, skip_serializing_if = "Option::is_none")]
106    relative_url: Option<String>,
107    #[serde(default, skip_serializing_if = "Option::is_none")]
108    relative_path: Option<PathBuf>,
109    entries: HashMap<String, Item>,
110}
111
112///
113/// A specific resolver item.
114///
115#[derive(Clone, Debug, Deserialize, Serialize)]
116#[serde(rename_all = "snake_case")]
117pub struct Item {
118    relative_url: String,
119    relative_path: PathBuf,
120}
121
122// ------------------------------------------------------------------------------------------------
123// Private Macros
124// ------------------------------------------------------------------------------------------------
125
126macro_rules! trace_entry {
127    ($type_name: literal, $fn_name: literal) => {
128        const FULL_NAME: &str = concat!($type_name, "::", $fn_name);
129        let tracing_span = ::tracing::trace_span!(FULL_NAME);
130        let _enter_span = tracing_span.enter();
131        ::tracing::trace!("{FULL_NAME}()");
132    };
133    ($type_name: literal, $fn_name: literal => $format: literal, $( $value: expr ),+ ) => {
134        const FULL_NAME: &str = concat!($type_name, "::", $fn_name);
135        let tracing_span = ::tracing::trace_span!(FULL_NAME);
136        let _enter_span = tracing_span.enter();
137        let arguments = format!($format, $( $value ),+);
138        ::tracing::trace!("{FULL_NAME}({arguments})");
139    };
140}
141
142// ------------------------------------------------------------------------------------------------
143// Implementations
144// ------------------------------------------------------------------------------------------------
145
146impl Default for FsModuleResolver {
147    fn default() -> Self {
148        trace_entry!("ModuleResolver", "default");
149
150        // 1. Use the standard environment variable as a search path
151        let mut search_path = SearchPath::new_or_default(SDML_RESOLVER_PATH_VARIABLE);
152
153        // 2. Add the current directory to the search path
154        search_path.prepend_cwd();
155
156        // 3. Load catalog file
157        let catalog = match env::var(SDML_CATALOG_FILE_VARIABLE) {
158            // If the environment variable is provided, load it from the location provided
159            Ok(catalog_file) => {
160                let catalog_file_path = PathBuf::from(catalog_file);
161                let module_catalog = ModuleCatalog::load_from_file(catalog_file_path.as_path());
162                if module_catalog.is_none() {
163                    error!("The path to module catalog was provided through environment variable, yet it failed to load.");
164                }
165                module_catalog
166            }
167            // If the environment variable is not provided, load it from the current directory (or any parent directory)
168            _ => ModuleCatalog::load_from_current(true),
169        };
170
171        let _self = Self {
172            catalog,
173            search_path,
174        };
175
176        trace!("=> {:?}", _self);
177        _self
178    }
179}
180
181impl ModuleResolver for FsModuleResolver {
182    fn name_to_resource(&self, name: &Identifier, from: Option<FileId>) -> Result<Url, Error> {
183        Url::from_file_path(self.name_to_path(name, from)?)
184            .map_err(|_| Error::UrlParseError { source: None })
185    }
186}
187
188impl FsModuleResolver {
189    /// Add the provided path to the beginning of the search list.
190    pub fn prepend_to_search_path(&mut self, path: &Path) {
191        self.search_path.append(PathBuf::from(path));
192    }
193
194    /// Add the provided path to the end of the search list.
195    pub fn append_to_search_path(&mut self, path: &Path) {
196        self.search_path.append(PathBuf::from(path));
197    }
198
199    /// Return a file system path for the resource that /should/ contain the named module.
200    pub fn name_to_path(&self, name: &Identifier, from: Option<FileId>) -> Result<PathBuf, Error> {
201        trace_entry!("ModuleResolver", "name_to_path" => "{}", name);
202        if let Some(catalog) = &self.catalog {
203            let name: String = name.to_string();
204            if let Some(path) = catalog.resolve_local_path(&name) {
205                trace!("Found module in catalog, path: {path:?}");
206                return Ok(path);
207            }
208        }
209        self.search_path
210            .find(format!("{}.{}", name, SDML_FILE_EXTENSION).as_ref())
211            .or_else(|| {
212                self.search_path
213                    .find(format!("{}/{}.{}", name, name, SDML_FILE_EXTENSION).as_ref())
214                    .or_else(|| {
215                        self.search_path
216                            .find(format!("{}.{}", name, SDML_FILE_EXTENSION_LONG).as_ref())
217                            .or_else(|| {
218                                self.search_path.find(
219                                    format!("{}/{}.{}", name, name, SDML_FILE_EXTENSION_LONG)
220                                        .as_ref(),
221                                )
222                            })
223                    })
224            })
225            .ok_or_else(|| {
226                imported_module_not_found(
227                    from.unwrap_or_default(),
228                    name.source_span().map(|span| span.into()),
229                    name,
230                )
231                .into()
232            })
233    }
234}
235
236// ------------------------------------------------------------------------------------------------
237
238impl Default for FsModuleLoader {
239    fn default() -> Self {
240        Self {
241            resolver: Default::default(),
242            module_file_ids: Default::default(),
243            module_files: SimpleFiles::new(),
244            reporter: Box::<StandardStreamReporter>::default(),
245        }
246    }
247}
248
249impl ModuleLoader for FsModuleLoader {
250    fn load(
251        &mut self,
252        name: &Identifier,
253        from: Option<FileId>,
254        cache: &mut impl ModuleStore,
255        recursive: bool,
256    ) -> Result<Identifier, Error> {
257        trace_entry!("ModuleLoader", "load" => "{}", name);
258        if stdlib::library_module(name).is_some() {
259            Ok(name.clone())
260        } else {
261            let file = match self.resolver.name_to_path(name, from) {
262                Ok(f) => f,
263                Err(Error::LanguageValidationError { source }) => {
264                    self.report(&source)?;
265                    return Err(source.into());
266                }
267                Err(e) => return Err(e),
268            };
269            self.load_from_file(file, cache, recursive)
270        }
271    }
272
273    fn resolver(&self) -> &impl ModuleResolver {
274        &self.resolver
275    }
276
277    fn get_file_id(&self, name: &Identifier) -> Option<sdml_errors::FileId> {
278        self.module_file_ids.get(name).copied()
279    }
280
281    fn get_source(&self, file_id: FileId) -> Option<Source> {
282        match self.files().get(file_id) {
283            Ok(file) => Some(file.source().clone()),
284            Err(err) => {
285                error!("Could not retrieve module: {file_id:?}, error: {err}");
286                None
287            }
288        }
289    }
290
291    fn report(&self, diagnostic: &Diagnostic) -> Result<(), Error> {
292        self.reporter.emit(diagnostic, self.files())
293    }
294
295    fn reporter_done(&self, top_module_name: Option<String>) -> Result<ReportCounters, Error> {
296        self.reporter.done(top_module_name)
297    }
298
299    fn set_severity_filter(&mut self, filter: SeverityFilter) {
300        self.reporter.set_severity_filter(filter);
301    }
302}
303
304impl FsModuleLoader {
305    pub fn with_resolver(self, resolver: FsModuleResolver) -> Self {
306        Self { resolver, ..self }
307    }
308
309    pub fn with_reporter(self, reporter: Box<dyn Reporter>) -> Self {
310        Self { reporter, ..self }
311    }
312
313    /// Load a module from the source in `file`.
314    pub fn load_from_file(
315        &mut self,
316        file: PathBuf,
317        cache: &mut impl ModuleStore,
318        recursive: bool,
319    ) -> Result<Identifier, Error> {
320        trace_entry!("ModuleLoader", "load_from_file" => "{:?}", file);
321        let mut reader = File::open(&file)?;
322        let catalog = self.resolver.catalog.clone();
323        let module_name = self.load_inner(&mut reader, Some(file.clone()), cache, recursive)?;
324        let module = cache.get_mut(&module_name).unwrap();
325        module.set_source_file(file.clone());
326        if !module.has_base_uri() {
327            if let Some(catalog) = catalog {
328                let name = module.name().to_string();
329                if let Some(url) = catalog.resolve_uri(&name) {
330                    module.set_base_uri(HeaderValue::from(url));
331                }
332            } else {
333                let file = file.canonicalize()?;
334                match Url::from_file_path(file) {
335                    Ok(base) => module.set_base_uri(HeaderValue::from(base)),
336                    Err(_) => warn!("Could not construct a base URI"),
337                }
338            }
339        }
340        Ok(module_name)
341    }
342
343    /// Load a module reading the source from `reader`.
344    pub fn load_from_reader(
345        &mut self,
346        reader: &mut dyn Read,
347        cache: &mut impl ModuleStore,
348        recursive: bool,
349    ) -> Result<Identifier, Error> {
350        trace_entry!("ModuleLoader", "load_from_reader");
351        self.load_inner(reader, None, cache, recursive)
352    }
353
354    fn load_inner(
355        &mut self,
356        reader: &mut dyn Read,
357        file: Option<PathBuf>,
358        cache: &mut impl ModuleStore,
359        recursive: bool,
360    ) -> Result<Identifier, Error> {
361        trace!("ModuleLoader::load_inner(..., {file:?}, ..., {recursive})");
362        let mut source = String::new();
363        reader.read_to_string(&mut source)?;
364        let file_name: String = file
365            .map(|p| p.to_string_lossy().into_owned())
366            .unwrap_or_default();
367        let file_id = self.module_files.add(file_name, source.into());
368
369        let module = parse_str(file_id, self)?;
370
371        let name = module.name().clone();
372
373        let _ = self.module_file_ids.insert(name.clone(), file_id);
374
375        cache.insert(module);
376
377        if recursive {
378            let dependencies = {
379                let module = cache.get(&name).unwrap();
380                module
381                    .imported_modules()
382                    .into_iter()
383                    .cloned()
384                    .collect::<Vec<Identifier>>()
385            };
386            for name in &dependencies {
387                if !cache.contains(name) {
388                    debug!("didn't find module {name} in cache, loading");
389                    // TODO: this bails on the first missing import, is that what we want?
390                    self.load(name, Some(file_id), cache, recursive)?;
391                } else {
392                    debug!("found module {name} in cache");
393                }
394            }
395        }
396        Ok(name)
397    }
398
399    #[inline(always)]
400    pub(crate) fn files(&self) -> &SimpleFiles<String, Source> {
401        &self.module_files
402    }
403}
404
405// ------------------------------------------------------------------------------------------------
406
407impl ModuleCatalog {
408    ///
409    /// Load a resolver catalog file from the current directory.
410    ///
411    /// If the parameter `look_in_parents` is `true` this will check parent directories.
412    ///
413    pub fn load_from_current(look_in_parents: bool) -> Option<Self> {
414        trace!("ModuleCatalog::load_from_current({look_in_parents})");
415        let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
416        Self::load_from(&cwd, look_in_parents)
417    }
418
419    ///
420    /// Load a resolver catalog file from the `path`.
421    ///
422    /// If the parameter `look_in_parents` is `true` this will check parent directories.
423    ///
424    pub fn load_from(path: &Path, look_in_parents: bool) -> Option<Self> {
425        trace!("ModuleCatalog::load_from({path:?}, {look_in_parents})");
426        if path.is_file() {
427            Self::load_from_file(path)
428        } else if path.is_dir() {
429            let file = path.join(SDML_CATALOG_FILE_NAME);
430            if file.is_file() {
431                Self::load_from_file(&file)
432            } else if look_in_parents {
433                if let Some(parent_path) = path.parent() {
434                    Self::load_from(parent_path, look_in_parents)
435                } else {
436                    warn!("No catalog file found in file-system parent path");
437                    None
438                }
439            } else {
440                warn!("No catalog found in provided directory");
441                None
442            }
443        } else {
444            warn!("The provided path was not a file or directory");
445            None
446        }
447    }
448
449    ///
450    /// Load from the `file` path, this has been found by one of the methods above and so it should
451    /// exist.
452    ///
453    fn load_from_file(file: &Path) -> Option<Self> {
454        trace!("ModuleCatalog::load_from_file({file:?})");
455        match std::fs::read_to_string(file) {
456            Ok(source) => match serde_json::from_str::<ModuleCatalog>(&source) {
457                Ok(mut catalog) => {
458                    catalog.loaded_from = file.parent().unwrap().to_path_buf();
459                    info!("Loaded catalog, file: {file:?}");
460                    Some(catalog)
461                }
462                Err(e) => {
463                    error!("Error parsing catalog, file: {file:?}, error: {e}");
464                    None
465                }
466            },
467            Err(e) => {
468                error!("Error reading catalog, file: {file:?}, error: {e}");
469                None
470            }
471        }
472    }
473
474    // --------------------------------------------------------------------------------------------
475
476    pub fn base(&self) -> &Url {
477        &self.base
478    }
479
480    pub fn set_base(&mut self, base: Url) {
481        self.base = base;
482    }
483
484    // --------------------------------------------------------------------------------------------
485
486    pub fn loaded_from(&self) -> &PathBuf {
487        &self.loaded_from
488    }
489
490    // --------------------------------------------------------------------------------------------
491
492    pub fn has_entries(&self) -> bool {
493        !self.entries.is_empty()
494    }
495
496    pub fn get_entry(&self, key: &String) -> Option<&CatalogEntry> {
497        self.entries.get(key)
498    }
499
500    pub fn entries_contains_key(&self, key: &String) -> bool {
501        self.entries.contains_key(key)
502    }
503
504    pub fn entries(&self) -> impl Iterator<Item = (&String, &CatalogEntry)> {
505        self.entries.iter()
506    }
507
508    pub fn entry_keys(&self) -> impl Iterator<Item = &String> {
509        self.entries.keys()
510    }
511
512    pub fn entry_values(&self) -> impl Iterator<Item = &CatalogEntry> {
513        self.entries.values()
514    }
515
516    // --------------------------------------------------------------------------------------------
517
518    pub fn groups(&self) -> impl Iterator<Item = (&String, &Group)> {
519        self.entries()
520            .filter_map(|(k, e)| e.as_group().map(|group| (k, group)))
521    }
522
523    // --------------------------------------------------------------------------------------------
524
525    pub fn items(&self) -> impl Iterator<Item = (&String, &Item)> {
526        self.entries()
527            .filter_map(|(k, e)| e.as_item().map(|item| (k, item)))
528    }
529
530    // --------------------------------------------------------------------------------------------
531
532    pub fn resolve_uri(&self, module: &String) -> Option<Url> {
533        if let Some(CatalogEntry::Item(item)) = self.get_entry(module) {
534            Some(self.base.join(item.relative_url().as_str()).unwrap())
535        } else {
536            self.groups()
537                .find(|(_, g)| g.entries_contains_key(module))
538                .map(|(_, g)| g.resolve_uri(&self.base, module))
539                .unwrap_or_default()
540        }
541    }
542
543    pub fn resolve_local_path(&self, module: &String) -> Option<PathBuf> {
544        if let Some(CatalogEntry::Item(item)) = self.get_entry(module) {
545            Some(self.loaded_from.join(item.relative_path()))
546        } else {
547            self.groups()
548                .find(|(_, g)| g.entries_contains_key(module))
549                .map(|(_, g)| g.resolve_local_path(&self.loaded_from, module))
550                .unwrap_or_default()
551        }
552    }
553}
554
555impl From<Group> for CatalogEntry {
556    fn from(value: Group) -> Self {
557        Self::Group(value)
558    }
559}
560
561impl From<Item> for CatalogEntry {
562    fn from(value: Item) -> Self {
563        Self::Item(value)
564    }
565}
566
567impl CatalogEntry {
568    pub fn is_group(&self) -> bool {
569        matches!(self, Self::Group(_))
570    }
571    pub fn as_group(&self) -> Option<&Group> {
572        match self {
573            Self::Group(v) => Some(v),
574            _ => None,
575        }
576    }
577
578    // --------------------------------------------------------------------------------------------
579
580    pub fn is_item(&self) -> bool {
581        matches!(self, Self::Item(_))
582    }
583    pub fn as_item(&self) -> Option<&Item> {
584        match self {
585            Self::Item(v) => Some(v),
586            _ => None,
587        }
588    }
589}
590
591impl Group {
592    pub fn relative_path(&self) -> Option<&PathBuf> {
593        self.relative_path.as_ref()
594    }
595    pub fn set_relative_path(&mut self, relative_path: PathBuf) {
596        self.relative_path = Some(relative_path);
597    }
598    pub fn unset_relative_path(&mut self) {
599        self.relative_path = None;
600    }
601
602    // --------------------------------------------------------------------------------------------
603
604    pub fn relative_url(&self) -> Option<&String> {
605        self.relative_url.as_ref()
606    }
607    pub fn set_relative_url(&mut self, relative_url: String) {
608        self.relative_url = Some(relative_url);
609    }
610    pub fn unset_relative_url(&mut self) {
611        self.relative_url = None;
612    }
613
614    // --------------------------------------------------------------------------------------------
615
616    pub fn has_entries(&self) -> bool {
617        !self.entries.is_empty()
618    }
619
620    pub fn get_entry(&self, key: &String) -> Option<&Item> {
621        self.entries.get(key)
622    }
623
624    pub fn entries_contains_key(&self, key: &String) -> bool {
625        self.entries.contains_key(key)
626    }
627
628    pub fn entries(&self) -> impl Iterator<Item = (&String, &Item)> {
629        self.entries.iter()
630    }
631
632    pub fn entry_keys(&self) -> impl Iterator<Item = &String> {
633        self.entries.keys()
634    }
635
636    pub fn entry_values(&self) -> impl Iterator<Item = &Item> {
637        self.entries.values()
638    }
639
640    // --------------------------------------------------------------------------------------------
641
642    pub fn resolve_uri(&self, base: &Url, module: &String) -> Option<Url> {
643        let base = if let Some(relative_url) = &self.relative_url {
644            base.join(relative_url.as_str()).unwrap()
645        } else {
646            base.clone()
647        };
648        self.get_entry(module)
649            .map(|item| base.join(item.relative_url().as_str()).unwrap())
650    }
651
652    pub fn resolve_local_path(&self, base: &Path, module: &String) -> Option<PathBuf> {
653        let base = if let Some(group_base) = &self.relative_path {
654            base.join(group_base)
655        } else {
656            base.to_path_buf()
657        };
658        self.get_entry(module)
659            .map(|item| base.join(item.relative_url().as_str()))
660    }
661}
662
663impl Item {
664    pub fn relative_path(&self) -> &PathBuf {
665        &self.relative_path
666    }
667
668    pub fn set_relative_path(&mut self, relative_path: PathBuf) {
669        self.relative_path = relative_path;
670    }
671
672    // --------------------------------------------------------------------------------------------
673
674    pub fn relative_url(&self) -> &String {
675        &self.relative_url
676    }
677
678    pub fn set_relative_url(&mut self, relative_url: String) {
679        self.relative_url = relative_url;
680    }
681}
682
683// ------------------------------------------------------------------------------------------------
684// Unit Tests
685// ------------------------------------------------------------------------------------------------
686
687#[cfg(test)]
688mod tests {
689    use super::*;
690
691    #[test]
692    fn test_generate_catalog() {
693        let catalog = ModuleCatalog {
694            base: Url::parse("https://example.org/schema/").unwrap(),
695            loaded_from: PathBuf::from("."),
696            entries: vec![(
697                String::from("rentals"),
698                CatalogEntry::Item(Item {
699                    relative_url: String::from("rentals/v1/"),
700                    relative_path: PathBuf::from("examples/rentals.sdm"),
701                }),
702            )]
703            .into_iter()
704            .collect(),
705        };
706        println!("{}", serde_json::to_string_pretty(&catalog).unwrap());
707    }
708
709    #[test]
710    fn test_parse_catalog() {
711        let _: ModuleCatalog = serde_json::from_str(
712            r#"{
713  "base": "https://example.org/rentals/",
714  "entries": {
715    "vehicle": {
716      "item": {
717        "relative_url": "vehicle#",
718        "relative_path": "vehicle-v1.sdm"
719      }
720    }
721  }
722}"#,
723        )
724        .unwrap();
725    }
726
727    #[test]
728    fn test_parse_catalog_with_group() {
729        let _: ModuleCatalog = serde_json::from_str(
730            r#"{
731  "base": "https://example.org/rentals/",
732  "entries": {
733    "rentals": {
734      "group": {
735        "relative_name": "entities/",
736        "relative_path": "/entities-v1",
737        "entries": {
738            "item": {
739              "relative_url": "vehicle#",
740              "relative_path": "vehicle-v1.sdm"
741          }
742        }
743      }
744    }
745  }
746}"#,
747        )
748        .unwrap();
749    }
750}