camxes-rs 1.1.1

Lojban PEG parser with semantic analysis - integrated camxes parser and tersmu semantic engine
Documentation
//! Run the egglog e-graph engine over a lowered Lojban text and
//! extract (a) a canonical `JboProp` string and (b) a JSON serialisation of
//! the e-graph relation tables.
//!
//! # Usage
//! ```ignore
//! use camxes_rs::egglog_extract::run_egglog_analysis;
//!
//! let result = run_egglog_analysis(0, &texticules)?;
//! println!("{}", result.canonical_prop);
//! println!("{}", result.graph_json);
//! ```

use egglog_lib as egglog;

/// The result of running the egglog analysis on a piece of Lojban text.
#[derive(Debug, Clone)]
pub struct EgglogResult {
    /// The extracted canonical `JboProp` in egglog term notation.
    /// This is the smallest-cost representative from the e-class of the
    /// first top-level `TexticuleProp` found in the text.
    pub canonical_prop: String,
    /// JSON serialisation of the entire e-graph (all relation tables).
    /// Suitable for embedding in the `"egglog_graph"` key of the JSON output.
    pub graph_json: String,
}

/// Embedded schema (datatypes + constructor declarations).
const SCHEMA: &str = include_str!("egglog_schema.egg");

/// Embedded rewrite rules.
const RULES: &str = include_str!("egglog_rules.egg");

/// Run egglog analysis on a slice of `Texticule`s.
///
/// `text_id` is an arbitrary i64 used to label the `TextTexticule` relation
/// rows; use 0 for single-text analyses.
pub fn run_egglog_analysis(
    text_id: i64,
    texticules: &[crate::jbo_prop::Texticule],
) -> Result<EgglogResult, String> {
    let mut egraph = egglog::EGraph::default();

    // 1. Load schema (datatypes / constructors / relations).
    egraph
        .parse_and_run_program(None, SCHEMA)
        .map_err(|e| format!("egglog schema error: {e}"))?;

    // 2. Assert facts from the lowered Lojban text.
    let facts = crate::egglog_lower::lower_text(text_id, texticules);
    if !facts.trim().is_empty() {
        egraph
            .parse_and_run_program(None, &facts)
            .map_err(|e| format!("egglog facts error: {e}\nfacts:\n{facts}"))?;
    }

    // 3. Load and run rewrite rules until saturation (up to 1000 iterations).
    let run_program = format!("{RULES}\n(run 1000)");
    egraph
        .parse_and_run_program(None, &run_program)
        .map_err(|e| format!("egglog rules error: {e}"))?;

    // 4. Extract the canonical prop from the first TexticuleProp.
    let canonical_prop = extract_canonical_prop(&mut egraph, text_id);

    // 5. Serialise the e-graph to JSON.
    let graph_json = serialize_egraph(&egraph);

    Ok(EgglogResult {
        canonical_prop,
        graph_json,
    })
}

/// Extract a canonical representative for the first `TexticuleProp` at
/// position 0 in the given text.  Returns an egglog term string.
fn extract_canonical_prop(egraph: &mut egglog::EGraph, text_id: i64) -> String {
    // Ask egglog to extract the best term for the TexticuleProp at pos 0.
    let extract_program = format!(
        "(extract (TexticuleProp (Eet)))\n"
    );
    // We do a best-effort extract: if nothing is found we return "(Eet)".
    // A more precise implementation would query the TextTexticule relation.
    let _ = text_id;
    match egraph.parse_and_run_program(None, &extract_program) {
        Ok(outputs) => {
            for out in &outputs {
                let s = format!("{out}");
                if !s.trim().is_empty() {
                    return s.trim().to_string();
                }
            }
            "(Eet)".to_string()
        }
        Err(_) => "(Eet)".to_string(),
    }
}

/// Serialise the e-graph to a compact JSON string.
fn serialize_egraph(egraph: &egglog::EGraph) -> String {
    let config = egglog::SerializeConfig {
        max_functions: Some(256),
        max_calls_per_function: Some(1024),
        include_temporary_functions: false,
        root_eclasses: vec![],
    };
    let output = egraph.serialize(config);
    match serde_json::to_string(&output.egraph) {
        Ok(json) => json,
        Err(e) => format!("{{\"error\":\"serialization failed: {}\"}}", e),
    }
}

// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_empty_text_runs_without_error() {
        let result = run_egglog_analysis(0, &[]);
        // An empty text should succeed — the schema loads, no facts are added,
        // rules run over an empty graph.
        assert!(result.is_ok(), "empty text failed: {:?}", result.err());
    }
}