corpora-engine 0.1.0

Bus orchestrator and IO edges (parser, fs walk, writer, git oracle) for corpora.
Documentation
//! Plugin seam #0 — where records come from. [`SourceDriver`] adapts a [`Source`] onto
//! the bus: scan on `RunStarted`, re-discover on `Changed`.

use std::fs;
use std::path::{Path, PathBuf};
use std::sync::Arc;

use corpora_core::subscriber::ek;
use corpora_core::{Context, DocPath, Event, Interest, Record, Subscriber};

pub trait Source {
    fn scan(&mut self, cx: &mut Context<'_>);
}

pub struct SourceDriver {
    inner: Box<dyn Source>,
}

impl SourceDriver {
    pub fn new(inner: Box<dyn Source>) -> Self {
        SourceDriver { inner }
    }
}

impl Subscriber for SourceDriver {
    fn name(&self) -> &'static str {
        "source"
    }

    fn interest(&self) -> Interest {
        Interest::only(ek::RUN_STARTED | ek::CHANGED)
    }

    fn handle(&mut self, ev: &Event, cx: &mut Context<'_>) {
        match ev {
            // Scan emits Discovered/Parsed; the engine injects `Settled` once the resulting
            // cascade drains, so the graph builds only after every record has arrived.
            Event::RunStarted { .. } => self.inner.scan(cx),
            Event::Changed(p) => cx.emit(Event::Discovered(p.clone())),
            _ => {}
        }
    }
}

/// Walks `root` recursively for `*.md` files, emitting `Discovered` in sorted order.
pub struct FsWalk {
    root: PathBuf,
}

impl FsWalk {
    pub fn new(root: impl Into<PathBuf>) -> Self {
        FsWalk { root: root.into() }
    }
}

impl Source for FsWalk {
    fn scan(&mut self, cx: &mut Context<'_>) {
        let mut paths = Vec::new();
        walk(&self.root, &mut paths);
        paths.sort(); // deterministic discovery order
        for p in paths {
            cx.emit(Event::Discovered(DocPath(p)));
        }
    }
}

fn walk(dir: &Path, out: &mut Vec<String>) {
    let Ok(entries) = fs::read_dir(dir) else {
        return;
    };
    for e in entries.flatten() {
        let path = e.path();
        if path.is_dir() {
            walk(&path, out);
        } else if path.extension().and_then(|x| x.to_str()) == Some("md") {
            if let Some(s) = path.to_str() {
                out.push(s.to_string());
            }
        }
    }
}

/// In-memory source of pre-built records — emits `Parsed` directly, bypassing the parser.
/// For tests and embedding the engine without a filesystem.
pub struct VecSource {
    records: Vec<Arc<Record>>,
}

impl VecSource {
    pub fn new(records: Vec<Arc<Record>>) -> Self {
        VecSource { records }
    }
}

impl Source for VecSource {
    fn scan(&mut self, cx: &mut Context<'_>) {
        for r in &self.records {
            cx.emit(Event::Parsed(r.clone()));
        }
    }
}