vyre-conform 0.1.0

Conformance suite for vyre backends — proves byte-identical output to CPU reference
Documentation
use std::path::Path;

use walkdir::WalkDir;

use super::document::TomlDocument;
use super::error::LoaderError;
use super::registry::TomlRegistry;
use super::schema_version::SCHEMA_VERSION;

/// Loads, validates, and registers all TOML rules from the given directory.
///
/// Ensures path traversal is prevented and logs warnings for any conflicting
/// overrides found across multiple TOML files.
#[inline]
pub fn load_rules<P: AsRef<Path>>(rules_dir: P) -> Result<TomlRegistry, LoaderError> {
    let mut registry = TomlRegistry::default();

    if !rules_dir.as_ref().exists() {
        return Ok(registry);
    }

    let canonical_dir = std::fs::canonicalize(rules_dir.as_ref()).map_err(|e| LoaderError::Io {
        path: rules_dir.as_ref().to_path_buf(),
        source: e,
    })?;

    let entries: Vec<_> = WalkDir::new(&canonical_dir)
        .into_iter()
        .map(|e| {
            e.map_err(|err| {
                let path = err
                    .path()
                    .map(|p| p.to_path_buf())
                    .unwrap_or_else(|| canonical_dir.clone());
                let msg = err.to_string();
                LoaderError::Io {
                    path,
                    source: err
                        .into_io_error()
                        .unwrap_or_else(|| std::io::Error::other(msg)),
                }
            })
        })
        .collect::<Result<_, _>>()?;

    let mut toml_paths: Vec<_> = entries
        .into_iter()
        .filter(|e| e.file_type().is_file())
        .map(|e| e.into_path())
        .filter(|p| p.extension().and_then(|s| s.to_str()) == Some("toml"))
        .collect();

    toml_paths.sort();

    for path in &toml_paths {
        let canonical_path = std::fs::canonicalize(path).map_err(|e| LoaderError::Io {
            path: path.to_path_buf(),
            source: e,
        })?;
        if !canonical_path.starts_with(&canonical_dir) {
            return Err(LoaderError::PathTraversal(canonical_path));
        }

        let metadata = std::fs::metadata(&canonical_path).map_err(|e| LoaderError::Io {
            path: canonical_path.clone(),
            source: e,
        })?;
        const MAX_TOML_SIZE: u64 = 1_048_576;
        let file_len = metadata.len();
        if file_len > MAX_TOML_SIZE {
            return Err(LoaderError::TomlTooLarge {
                path: canonical_path.clone(),
                bytes: file_len,
            });
        }

        let content = std::fs::read_to_string(&canonical_path).map_err(|e| LoaderError::Io {
            path: canonical_path.clone(),
            source: e,
        })?;
        let doc: TomlDocument = match toml::from_str(&content) {
            Ok(d) => d,
            Err(e) => return Err(LoaderError::Parse(e)),
        };

        doc.validate(&canonical_path)?;

        if doc.version != SCHEMA_VERSION {
            return Err(LoaderError::UnsupportedVersion(doc.version));
        }

        for op in doc.ops {
            if registry.ops.insert(op.name.clone(), op).is_some() {
                tracing::warn!("Conflicting override for op in {:?}", canonical_path);
            }
        }
        for w in doc.witnesses {
            registry.witnesses.push(w);
        }
        for d in doc.defendants {
            if registry.defendants.insert(d.name.clone(), d).is_some() {
                tracing::warn!("Conflicting override for defendant in {:?}", canonical_path);
            }
        }
        for law in doc.laws {
            if registry.laws.insert(law.name.clone(), law).is_some() {
                tracing::warn!("Conflicting override for law in {:?}", canonical_path);
            }
        }
        for rule in doc.independence {
            registry.independence.push(rule);
        }
    }

    Ok(registry)
}