typical 0.9.2

Algebraic data types for data interchange.
use {
    crate::{
        error::{listing, throw, Error},
        format::CodeStr,
        parser::parse,
        schema,
        tokenizer::tokenize,
    },
    std::{
        borrow::ToOwned,
        collections::{BTreeMap, HashSet},
        fs::read_to_string,
        io::{self, ErrorKind},
        path::PathBuf,
        path::{Component, Path},
    },
};

// Convert a path to a namespace. This function will panic if the path cannot be converted into a
// namespace (e.g., because it contains `..`).
fn path_to_namespace(path: &Path) -> schema::Namespace {
    let mut path = path.to_owned();
    path.set_extension("");

    schema::Namespace {
        components: path
            .components()
            .map(|component| match component {
                Component::Normal(component) => {
                    component.to_string_lossy().to_string().as_str().into()
                }
                _ => panic!(),
            })
            .collect(),
    }
}

// Load a schema and its transitive dependencies. The imports in the returned schemas are guaranteed
// to resolve.
#[allow(clippy::too_many_lines)]
#[allow(clippy::type_complexity)]
pub fn load_schemas(
    schema_path: &Path,
) -> Result<BTreeMap<schema::Namespace, (schema::Schema, PathBuf, String)>, Vec<Error>> {
    // The schema and all its transitive dependencies will end up here.
    let mut schemas = BTreeMap::new();

    // Any errors will end up here.
    let mut errors = vec![];

    // The base directory for the schema's dependencies is the directory containing the schema.
    let base_path = if let Some(base_path) = schema_path.parent() {
        base_path
    } else {
        errors.push(throw::<Error>(
            &format!(
                "{} is not a file.",
                schema_path.to_string_lossy().code_str(),
            ),
            None,
            None,
            None,
        ));

        return Err(errors);
    };

    // Canonicalize the path to the base directory. This will be used to calculate namespaces below.
    // Note that even with this we still need `base_path` from above, because canonicalization on
    // Windows adds a `\\?\` prefix to the path, which changes the meaning of `..` and thus prevents
    // us from joining it with other paths containing `..`. Note also that we can't simply
    // compute this by canonicalizing `base_path`, since `base_path` might have zero components,
    // which is considered invalid for canonicalization. So, instead, we canonicalize `schma_path`
    // and take the parent of the result.
    let canonical_base_path = match schema_path
        .canonicalize()
        .and_then(|canonical_schema_path| {
            canonical_schema_path
                .parent()
                .map(ToOwned::to_owned)
                .ok_or_else(|| io::Error::from(ErrorKind::Other))
        }) {
        Ok(canonical_base_path) => canonical_base_path,
        Err(error) => {
            errors.push(throw(
                &format!(
                    "{} is not a file.",
                    schema_path.to_string_lossy().code_str(),
                ),
                None,
                None,
                Some(error),
            ));

            return Err(errors);
        }
    };

    // Relative to the base directory, the path to the schema is the name of the schema file
    // [tag:based_schema_path_is_file_name].
    let based_schema_path = if let Some(based_schema_path) = schema_path.file_name() {
        AsRef::<Path>::as_ref(based_schema_path)
    } else {
        errors.push(throw::<Error>(
            &format!(
                "{} is not a file.",
                schema_path.to_string_lossy().code_str(),
            ),
            None,
            None,
            None,
        ));

        return Err(errors);
    };

    // Compute the namespace of the schema. This is safe due to
    // [ref:based_schema_path_is_file_name].
    let schema_namespace = path_to_namespace(based_schema_path);

    // Initialize the "frontier" with the given path. Paths in the frontier are relative to
    // `base_path` [tag:frontier_paths_based].
    let mut schemas_to_load = vec![(
        schema_namespace.clone(),
        based_schema_path.to_owned(),
        None as Option<(PathBuf, String)>,
    )];
    let mut visited_namespaces = HashSet::new();
    visited_namespaces.insert(schema_namespace);

    // Perform a depth-first traversal of the transitive dependencies.
    while let Some((namespace, path, origin)) = schemas_to_load.pop() {
        // Read the file.
        let contents = match read_to_string(&base_path.join(&path)) {
            Ok(contents) => contents,
            Err(error) => {
                let message = format!("Unable to load {}.", path.to_string_lossy().code_str());

                if let Some((origin_path, origin_listing)) = origin {
                    errors.push(throw(
                        &message,
                        Some(&origin_path),
                        Some(&origin_listing),
                        Some(error),
                    ));
                } else {
                    errors.push(throw(&message, None, None, Some(error)));
                }

                continue;
            }
        };

        // Tokenize the contents.
        let tokens = match tokenize(&path, &contents) {
            Ok(tokens) => tokens,
            Err(error) => {
                errors.extend_from_slice(&error);

                continue;
            }
        };

        // Parse the tokens.
        let mut schema = match parse(&path, &contents, &tokens) {
            Ok(schema) => schema,
            Err(error) => {
                errors.extend_from_slice(&error);

                continue;
            }
        };

        // Compute the base directory for this schema's dependencies. The `unwrap` is safe due to
        // [ref:frontier_paths_based].
        let parent_path = path.parent().unwrap();

        // Add the dependencies to the frontier.
        for import in schema.imports.values_mut() {
            // Compute the source listing for this import for error reporting.
            let origin_listing = listing(&contents, import.source_range);

            // Compute the import path.
            let non_canonical_import_path = base_path.join(parent_path.join(&import.path));

            // Canonicalize the import path.
            let canonical_import_path = match non_canonical_import_path.canonicalize() {
                Ok(canonical_import_path) => canonical_import_path,
                Err(error) => {
                    errors.push(throw(
                        &format!(
                            "Unable to load {}.",
                            non_canonical_import_path.to_string_lossy().code_str(),
                        ),
                        Some(&path),
                        Some(&origin_listing),
                        Some(error),
                    ));

                    continue;
                }
            };

            // Strip the base path from the schema path. Since this is computed from two canonical
            // paths, its guaranteed to contain only normal components
            // [tag:based_import_path_only_has_normal_components].
            let based_import_path = if let Ok(based_import_path) =
                canonical_import_path.strip_prefix(&canonical_base_path)
            {
                based_import_path.to_owned()
            } else {
                errors.push(throw::<Error>(
                    &format!(
                        "{} is not a descendant of {}, which is the base directory for this run.",
                        canonical_import_path.to_string_lossy().code_str(),
                        canonical_base_path.to_string_lossy().code_str(),
                    ),
                    Some(&path),
                    Some(&origin_listing),
                    None,
                ));

                continue;
            };

            // Populate the namespace of the import [tag:namespace_populated]. The
            // path-to-namespace conversion is safe due to
            // [ref:based_import_path_only_has_normal_components].
            let import_namespace = path_to_namespace(&based_import_path);
            import.namespace = Some(import_namespace.clone());

            // Visit this import if it hasn't been visited already.
            if !visited_namespaces.contains(&import_namespace) {
                visited_namespaces.insert(import_namespace.clone());
                schemas_to_load.push((
                    import_namespace,
                    based_import_path,
                    Some((path.clone(), origin_listing)),
                ));
            }
        }

        // Store the schema.
        if let Some((_, conflicting_schema_path, _)) =
            schemas.insert(namespace.clone(), (schema, path.clone(), contents))
        {
            errors.push(throw::<Error>(
                &format!(
                    "This file conflicts with {}, since both correspond to the same namespace {}.",
                    conflicting_schema_path.to_string_lossy().code_str(),
                    namespace.to_string().code_str(),
                ),
                Some(&path),
                None,
                None,
            ));
        }
    }

    // Return a success or report any errors.
    if errors.is_empty() {
        Ok(schemas)
    } else {
        Err(errors)
    }
}

#[cfg(test)]
mod tests {
    use {
        crate::{
            schema::Namespace,
            schema_loader::{load_schemas, path_to_namespace},
        },
        std::path::Path,
    };

    #[test]
    fn path_to_namespace_empty() {
        assert_eq!(
            path_to_namespace(Path::new("")),
            Namespace { components: vec![] },
        );
    }

    #[test]
    fn path_to_namespace_single() {
        assert_eq!(
            path_to_namespace(Path::new("foo")),
            Namespace {
                components: vec!["foo".into()],
            },
        );
    }

    #[test]
    fn path_to_namespace_double() {
        assert_eq!(
            path_to_namespace(Path::new("foo/bar")),
            Namespace {
                components: vec!["foo".into(), "bar".into()],
            },
        );
    }

    #[test]
    fn path_to_namespace_triple() {
        assert_eq!(
            path_to_namespace(Path::new("foo/bar/baz")),
            Namespace {
                components: vec!["foo".into(), "bar".into(), "baz".into()],
            },
        );
    }

    // This test doesn't work on Windows, for some reason.
    #[test]
    fn load_schemas_example() {
        load_schemas(Path::new("integration_tests/types/types.t")).unwrap();
    }
}