omne-cli 0.1.2

CLI for managing omne volumes: init, upgrade, and validate kernel and distro releases
Documentation
//! `omne validate` — check volume integrity and run the distro gate runner.
//!
//! Walks up from cwd to find `.omne/`, runs integrity checks (required
//! dirs, core manifest, image contents, MANIFEST.md fields, depth rule
//! scoped to `cfg/`/`log/`), and optionally invokes the gate runner with
//! graceful Python degradation (R15).

use std::path::{Component, Path};

use clap::Args as ClapArgs;
use path_clean::PathClean;
use walkdir::WalkDir;

use crate::error::CliError;
use crate::python;
use crate::volume;

/// Maximum nesting depth for user-authored content under `cfg/` and `log/`.
/// Measured as path components relative to `.omne/` (includes the `cfg/`
/// or `log/` prefix). e.g. `cfg/sub1/sub2` = depth 3, allowed.
/// Fixes the Python bug where MAX_DEPTH was 2 instead of 3 (R3).
const MAX_DEPTH: usize = 3;

/// Required top-level directories under `.omne/`.
const REQUIRED_DIRS: &[&str] = &["core", "image", "cfg", "log"];

/// Required directories under `.omne/image/`.
const REQUIRED_IMAGE_DIRS: &[&str] = &["agents", "skills", "hooks"];

/// Required files under `.omne/image/`.
const REQUIRED_IMAGE_FILES: &[&str] = &["context-map.md", "SYSTEM.md"];

/// Required fields in MANIFEST.md frontmatter. Extends the Python list
/// with `kernel-source` and `distro-source`.
const REQUIRED_MANIFEST_FIELDS: &[&str] = &[
    "volume",
    "distro",
    "created",
    "kernel-source",
    "distro-source",
];

/// Arguments for `omne validate`. None today; kept as an explicit struct
/// so adding flags later is mechanical.
#[derive(Debug, ClapArgs)]
pub struct Args {}

pub fn run(_args: &Args) -> Result<(), CliError> {
    let cwd = std::env::current_dir()
        .map_err(|e| CliError::Io(format!("cannot determine current directory: {e}")))?;
    validate_at_root(&cwd)
}

/// Test seam: validate a volume rooted at (or walked up from) `start`.
pub fn validate_at_root(start: &Path) -> Result<(), CliError> {
    let root = volume::find_omne_root(start).ok_or(CliError::NotAVolume)?;
    let omne = root.join(".omne");

    let mut issues = Vec::new();
    check_required_dirs(&omne, &mut issues);
    check_core(&omne, &mut issues);
    check_image(&omne, &mut issues);
    check_manifest(&omne, &mut issues);
    check_depth(&omne, &mut issues);
    check_gate_runner(&omne, &mut issues);

    if issues.is_empty() {
        eprintln!("\x1b[32mVolume is valid.\x1b[0m");
        Ok(())
    } else {
        Err(CliError::ValidationFailed { issues })
    }
}

/// Check that all required top-level directories exist under `.omne/`.
fn check_required_dirs(omne: &Path, issues: &mut Vec<String>) {
    for &dir in REQUIRED_DIRS {
        if !omne.join(dir).is_dir() {
            issues.push(format!("missing required directory: .omne/{dir}/"));
        }
    }
}

/// Check that `.omne/core/manifest.json` exists.
fn check_core(omne: &Path, issues: &mut Vec<String>) {
    let core = omne.join("core");
    if !core.is_dir() {
        return; // already caught by check_required_dirs
    }
    if !core.join("manifest.json").is_file() {
        issues.push("missing kernel manifest: core/manifest.json".to_string());
    }
}

/// Check that `.omne/image/` has required subdirectories and files.
fn check_image(omne: &Path, issues: &mut Vec<String>) {
    let image = omne.join("image");
    if !image.is_dir() {
        return; // already caught by check_required_dirs
    }
    for &dir in REQUIRED_IMAGE_DIRS {
        if !image.join(dir).is_dir() {
            issues.push(format!("missing required image directory: image/{dir}/"));
        }
    }
    for &file in REQUIRED_IMAGE_FILES {
        if !image.join(file).is_file() {
            issues.push(format!("missing required image file: image/{file}"));
        }
    }
}

/// Check that MANIFEST.md exists, has frontmatter, and contains all required fields.
fn check_manifest(omne: &Path, issues: &mut Vec<String>) {
    let manifest = omne.join("MANIFEST.md");
    if !manifest.is_file() {
        issues.push("missing MANIFEST.md".to_string());
        return;
    }

    let content = match std::fs::read_to_string(&manifest) {
        Ok(c) => c,
        Err(e) => {
            issues.push(format!("cannot read MANIFEST.md: {e}"));
            return;
        }
    };

    // Extract frontmatter block
    let Some(yaml_body) = extract_frontmatter(&content) else {
        issues.push("MANIFEST.md has no YAML frontmatter (---...---)".to_string());
        return;
    };

    // Check each required field via line-by-line scan (matching Python's
    // regex approach — looks for `field:` at start of line)
    for &field in REQUIRED_MANIFEST_FIELDS {
        let has_field = yaml_body
            .lines()
            .any(|line| line.starts_with(field) && line[field.len()..].starts_with(':'));
        if !has_field {
            issues.push(format!("MANIFEST.md missing required field: {field}"));
        }
    }
}

/// Extract the YAML body between `---` fences. Returns `None` if no valid
/// frontmatter block is found.
fn extract_frontmatter(content: &str) -> Option<String> {
    let mut lines = content.lines();
    if lines.next()? != "---" {
        return None;
    }
    let mut body = String::new();
    for line in lines {
        if line == "---" {
            return Some(body);
        }
        body.push_str(line);
        body.push('\n');
    }
    None // unclosed fence
}

/// Check depth of directories under `cfg/` and `log/` only.
/// MAX_DEPTH = 3 (fixes Python bug). Scoped to user-authored content:
/// `core/` and `image/` are excluded since they contain distro/kernel
/// release artifacts that may have deep nesting.
fn check_depth(omne: &Path, issues: &mut Vec<String>) {
    for &subdir in &["cfg", "log"] {
        let base = omne.join(subdir);
        if !base.is_dir() {
            continue; // already caught by check_required_dirs
        }
        for entry in WalkDir::new(&base).min_depth(1) {
            let entry = match entry {
                Ok(e) => e,
                Err(_) => continue,
            };
            if !entry.file_type().is_dir() {
                continue;
            }
            // Depth is measured from .omne/, so include the subdir prefix.
            // e.g. cfg/sub1/sub2/sub3 → 4 components from .omne/.
            let rel = match entry.path().strip_prefix(omne) {
                Ok(r) => r,
                Err(_) => continue,
            };
            let depth = rel.components().count();
            if depth > MAX_DEPTH {
                issues.push(format!(
                    "depth violation ({depth} > {MAX_DEPTH}): .omne/{}",
                    rel.display()
                ));
            }
        }
    }
}

/// Read gate_runner from core/manifest.json, validate path safety, and
/// invoke it with the discovered Python interpreter. Graceful degradation
/// when Python is absent (R15).
fn check_gate_runner(omne: &Path, issues: &mut Vec<String>) {
    let core_manifest = omne.join("core/manifest.json");
    if !core_manifest.is_file() {
        return; // no kernel manifest, skip
    }

    let content = match std::fs::read_to_string(&core_manifest) {
        Ok(c) => c,
        Err(_) => return,
    };

    let data: serde_json::Value = match serde_json::from_str(&content) {
        Ok(d) => d,
        Err(_) => {
            issues.push("core/manifest.json is invalid JSON".to_string());
            return;
        }
    };

    let gate_runner = match data.get("gate_runner").and_then(|v| v.as_str()) {
        Some(gr) => gr,
        None => return, // no gate runner defined, skip
    };

    // Path traversal safety check
    let image_dir = omne.join("image");
    if !is_safe_gate_runner_path(gate_runner, &image_dir) {
        issues.push(format!("gate runner path escapes image/: {gate_runner}"));
        return;
    }

    let runner_path = image_dir.join(gate_runner);
    if !runner_path.is_file() {
        // Per man/gate-protocol.md: "If no file exists at the gate
        // runner path, the step is skipped with a warning."
        eprintln!("\x1b[33mwarning:\x1b[0m gate runner not found: image/{gate_runner} (skipping)");
        return;
    }

    // Find Python interpreter
    let interpreter = match python::find_interpreter() {
        Some(interp) => interp,
        None => {
            // R15: graceful degradation — warn but don't add an issue
            eprintln!("\x1b[33m{}\x1b[0m", python::missing_python_warning());
            return;
        }
    };

    // Run the gate runner
    if let Err(e) = python::run_gate_runner(&interpreter, &runner_path, &image_dir) {
        match e {
            python::Error::GateRunnerFailed {
                exit_code,
                stdout,
                stderr,
            } => {
                let mut msg = format!("gate runner failed (exit {exit_code}):");
                for line in stdout.trim().lines() {
                    msg.push_str(&format!("\n  {line}"));
                }
                for line in stderr.trim().lines() {
                    msg.push_str(&format!("\n  {line}"));
                }
                issues.push(msg);
            }
            python::Error::GateRunnerTimedOut { elapsed_seconds } => {
                issues.push(format!(
                    "gate runner timed out after {elapsed_seconds} seconds"
                ));
            }
            python::Error::InterpreterInvocation(io_err) => {
                issues.push(format!("failed to invoke gate runner: {io_err}"));
            }
        }
    }
}

/// Check that a gate runner path is safe (no traversal, no absolute paths).
/// Reuses the same component-filtering logic as `tarball::extract_safe`.
fn is_safe_gate_runner_path(gate_runner: &str, image_dir: &Path) -> bool {
    let path = Path::new(gate_runner);

    // Reject absolute paths
    if path.is_absolute() {
        return false;
    }

    // Reject any component that is `..`, root, or prefix
    for component in path.components() {
        match component {
            Component::ParentDir | Component::RootDir | Component::Prefix(_) => {
                return false;
            }
            _ => {}
        }
    }

    // Final check: resolved path must stay under image_dir
    let resolved = image_dir.join(path).clean();
    let image_clean = image_dir.clean();
    resolved.starts_with(&image_clean)
}