cordance-core 0.1.1

Cordance core types, schemas, and ports. No I/O.
Documentation
//! Fenced editable regions: `<!-- cordance:begin <key> -->` ... `<!-- cordance:end <key> -->`.
//!
//! Doctrine: developer-experience.md — fast local loop. Manual edits between
//! the fences are user-owned. Edits outside the fences belong to the user
//! too; only content *inside* a fence is regenerated. This is what stops the
//! strict hash-fail UX the original spec proposed.

use serde::{Deserialize, Serialize};

#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
pub struct FenceMarker {
    pub key: String,
    pub begin_line: usize,
    pub end_line: usize,
}

#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
pub struct FencedRegion {
    pub key: String,
    /// Inclusive begin line of the begin marker (0-indexed).
    pub begin_line: usize,
    /// Inclusive end line of the end marker (0-indexed).
    pub end_line: usize,
    /// Body between markers (exclusive of marker lines), normalised to LF endings.
    pub body: String,
}

const BEGIN_PREFIX: &str = "<!-- cordance:begin ";
const END_PREFIX: &str = "<!-- cordance:end ";
const MARKER_SUFFIX: &str = " -->";

/// Find all cordance fence regions in a string. Returns regions in source order.
///
/// # Errors
///
/// Returns `Err` if a begin lacks a matching end, an end has no begin, or
/// regions overlap.
pub fn find_regions(input: &str) -> Result<Vec<FencedRegion>, FenceError> {
    let mut regions = Vec::new();
    let mut open: Option<(String, usize)> = None;

    for (i, line) in input.lines().enumerate() {
        let trimmed = line.trim();
        if let Some(rest) = trimmed.strip_prefix(BEGIN_PREFIX) {
            if let Some(key) = rest.strip_suffix(MARKER_SUFFIX) {
                if open.is_some() {
                    return Err(FenceError::Nested(i));
                }
                open = Some((key.trim().to_string(), i));
            }
        } else if let Some(rest) = trimmed.strip_prefix(END_PREFIX) {
            if let Some(key) = rest.strip_suffix(MARKER_SUFFIX) {
                let key = key.trim().to_string();
                let (open_key, begin) = open.take().ok_or(FenceError::UnmatchedEnd(i))?;
                if open_key != key {
                    return Err(FenceError::KeyMismatch {
                        begin_line: begin,
                        end_line: i,
                        begin_key: open_key,
                        end_key: key,
                    });
                }
                // `.lines()` already strips the line terminator (including \r\n),
                // so the collected body lines never contain trailing \r.
                let body = input
                    .lines()
                    .skip(begin + 1)
                    .take(i - begin - 1)
                    .collect::<Vec<_>>()
                    .join("\n");
                regions.push(FencedRegion {
                    key,
                    begin_line: begin,
                    end_line: i,
                    body,
                });
            }
        }
    }

    if let Some((key, line)) = open {
        return Err(FenceError::UnmatchedBegin { line, key });
    }
    Ok(regions)
}

#[derive(Clone, Debug, thiserror::Error)]
pub enum FenceError {
    #[error("unmatched cordance:begin at line {line} (key='{key}')")]
    UnmatchedBegin { line: usize, key: String },
    #[error("unmatched cordance:end at line {0}")]
    UnmatchedEnd(usize),
    #[error("nested cordance:begin at line {0}")]
    Nested(usize),
    #[error(
        "cordance fence key mismatch: begin at {begin_line} ('{begin_key}') vs end at \
         {end_line} ('{end_key}')"
    )]
    KeyMismatch {
        begin_line: usize,
        end_line: usize,
        begin_key: String,
        end_key: String,
    },
}

/// Sanitise a target-controlled string before interpolating into a fenced region.
///
/// Strips line-terminator codepoints and cordance fence markers so the value
/// cannot inject a fake fence boundary into the rendered output.
///
/// This is the canonical defence against the "target controls a string that
/// flows through `replace_regions`" injection class (round-2 redteam #2 /
/// round-3 redteam #1). Every emitter that interpolates a target-controlled
/// value (e.g. an `[axiom].source` knob from `cordance.toml`, or a
/// `[doctrine]` pin commit) into a fenced region MUST pass that value
/// through this function first.
///
/// Codepoints stripped:
/// - `\n` (LF) and `\r` (CR) — basic ASCII line terminators.
/// - `\u{2028}` (LINE SEPARATOR) and `\u{2029}` (PARAGRAPH SEPARATOR) —
///   unicode line breaks that the fence parser splits on too (round-3
///   bughunt). A bare U+2028 inside a TOML basic string round-trips through
///   serde and `read_to_string`, so it is an attainable injection vector.
///
/// Substrings replaced with `[redacted-fence-marker]`:
/// - `<!-- cordance:begin` — would let the value spawn a fake begin marker.
/// - `<!-- cordance:end` — would let the value close the active region early.
///
/// The function is idempotent: applying it twice yields the same string.
#[must_use]
pub fn sanitise_fenced_value(s: &str) -> String {
    s.replace(['\n', '\r', '\u{2028}', '\u{2029}'], " ")
        .replace("<!-- cordance:begin", "[redacted-fence-marker]")
        .replace("<!-- cordance:end", "[redacted-fence-marker]")
}

/// Replace the body of a region in-place. Returns the new string.
#[must_use]
pub fn replace_region(input: &str, key: &str, new_body: &str) -> String {
    replace_regions(input, &[(key, new_body)])
}

/// Replace multiple fenced regions in a single pass.
/// Replacements not listed are left unchanged.
#[must_use]
pub fn replace_regions(input: &str, replacements: &[(&str, &str)]) -> String {
    // All segments collected as owned strings so that lifetimes are unambiguous.
    let mut output: Vec<String> = Vec::new();
    // When `Some(key)`, we are inside a fenced region with that key.
    let mut in_region: Option<String> = None;

    for line in input.lines() {
        let trimmed = line.trim();

        if let Some(ref open_key) = in_region.clone() {
            // Check for the matching end marker.
            if let Some(rest) = trimmed.strip_prefix(END_PREFIX) {
                if let Some(found_key) = rest.strip_suffix(MARKER_SUFFIX) {
                    if found_key.trim() == open_key.as_str() {
                        in_region = None;
                        output.push(line.to_string());
                        continue;
                    }
                }
            }
            // Body line: emit only if this region is NOT being replaced.
            if !replacements.iter().any(|(k, _)| *k == open_key.as_str()) {
                output.push(line.to_string());
            }
            continue;
        }

        // Outside a region — check for begin marker.
        if let Some(rest) = trimmed.strip_prefix(BEGIN_PREFIX) {
            if let Some(found_key) = rest.strip_suffix(MARKER_SUFFIX) {
                let found_key = found_key.trim();
                // Always emit the begin marker line.
                output.push(line.to_string());
                // If this key has a replacement, emit the new body now.
                if let Some((_, new_body)) = replacements.iter().find(|(k, _)| *k == found_key) {
                    if !new_body.is_empty() {
                        output.push(String::from(*new_body));
                    }
                }
                in_region = Some(found_key.to_string());
                continue;
            }
        }

        output.push(line.to_string());
    }

    output.join("\n")
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn finds_simple_region() {
        let s = "intro\n<!-- cordance:begin authority -->\nold body\n<!-- cordance:end authority -->\noutro";
        let regions = find_regions(s).expect("parse");
        assert_eq!(regions.len(), 1);
        assert_eq!(regions[0].key, "authority");
        assert_eq!(regions[0].body, "old body");
    }

    #[test]
    fn replaces_region_body() {
        let s = "<!-- cordance:begin a -->\nold\n<!-- cordance:end a -->";
        let out = replace_region(s, "a", "new line 1\nnew line 2");
        assert!(out.contains("new line 1"));
        assert!(!out.contains("old"));
    }

    #[test]
    fn rejects_unmatched_begin() {
        let s = "<!-- cordance:begin x -->\nno end";
        assert!(find_regions(s).is_err());
    }

    #[test]
    fn rejects_key_mismatch() {
        let s = "<!-- cordance:begin a -->\nbody\n<!-- cordance:end b -->";
        assert!(matches!(
            find_regions(s),
            Err(FenceError::KeyMismatch { .. })
        ));
    }

    #[test]
    fn crlf_line_endings_parsed() {
        let s = "intro\r\n<!-- cordance:begin foo -->\r\nbody line\r\n<!-- cordance:end foo -->\r\noutro\r\n";
        let regions = find_regions(s).expect("parse crlf");
        assert_eq!(regions.len(), 1);
        assert_eq!(regions[0].key, "foo");
        assert!(
            !regions[0].body.contains('\r'),
            "body should not contain CR"
        );
    }

    #[test]
    fn replace_regions_multi_in_one_pass() {
        let s = "<!-- cordance:begin a -->\nold-a\n<!-- cordance:end a -->\n<!-- cordance:begin b -->\nold-b\n<!-- cordance:end b -->";
        let out = replace_regions(s, &[("a", "new-a"), ("b", "new-b")]);
        assert!(out.contains("new-a"));
        assert!(out.contains("new-b"));
        assert!(!out.contains("old-a"));
        assert!(!out.contains("old-b"));
    }

    #[test]
    fn replace_regions_only_updates_listed_keys() {
        let s = "<!-- cordance:begin a -->\nold-a\n<!-- cordance:end a -->\n<!-- cordance:begin b -->\nold-b\n<!-- cordance:end b -->\n<!-- cordance:begin c -->\nold-c\n<!-- cordance:end c -->";
        let out = replace_regions(s, &[("a", "new-a"), ("c", "new-c")]);
        assert!(out.contains("new-a"));
        assert!(out.contains("old-b"), "b should be unchanged");
        assert!(out.contains("new-c"));
    }

    #[test]
    fn replace_regions_preserves_unfenced_content_exactly() {
        let prefix = "prefix text unchanged\n";
        let suffix = "\nsuffix text unchanged";
        let s = format!("{prefix}<!-- cordance:begin x -->\nold\n<!-- cordance:end x -->{suffix}");
        let out = replace_regions(&s, &[("x", "new")]);
        assert!(out.starts_with(prefix));
        assert!(out.ends_with(suffix));
    }

    #[test]
    fn empty_body_between_fences() {
        let s = "<!-- cordance:begin empty -->\n<!-- cordance:end empty -->";
        let regions = find_regions(s).expect("parse");
        assert_eq!(regions.len(), 1);
        assert_eq!(regions[0].body, "");
    }

    #[test]
    fn adjacent_fences_different_keys() {
        let s = "<!-- cordance:begin x -->\nbody-x\n<!-- cordance:end x -->\n<!-- cordance:begin y -->\nbody-y\n<!-- cordance:end y -->";
        let regions = find_regions(s).expect("parse");
        assert_eq!(regions.len(), 2);
        assert_eq!(regions[0].key, "x");
        assert_eq!(regions[1].key, "y");
    }

    #[test]
    fn key_with_dashes_parsed() {
        let s = "<!-- cordance:begin hard-rules -->\ncontent\n<!-- cordance:end hard-rules -->";
        let regions = find_regions(s).expect("parse");
        assert_eq!(regions[0].key, "hard-rules");
    }

    #[test]
    fn unicode_body_preserved() {
        let s = "<!-- cordance:begin u -->\n🦀 rust ≥ 1.88\n<!-- cordance:end u -->";
        let regions = find_regions(s).expect("parse");
        assert!(regions[0].body.contains("🦀"));
        let out = replace_regions(s, &[]); // no-op replace
        assert!(out.contains("🦀"));
    }

    #[test]
    fn file_with_no_fences_returns_empty() {
        let s = "# Readme\n\nNo fences here.";
        let r = find_regions(s).expect("parse");
        assert!(r.is_empty());
    }

    #[test]
    fn replace_regions_no_matching_key_is_noop() {
        let s = "<!-- cordance:begin a -->\nbody\n<!-- cordance:end a -->";
        let out = replace_regions(s, &[("nonexistent", "ignored")]);
        assert!(out.contains("body"));
    }

    #[test]
    fn sanitise_strips_lf_and_cr() {
        let hostile = "v1\n<!-- cordance:end x -->\r\ninjected";
        let clean = sanitise_fenced_value(hostile);
        assert!(!clean.contains('\n'));
        assert!(!clean.contains('\r'));
        assert!(!clean.contains("<!-- cordance:end"));
        assert!(clean.contains("[redacted-fence-marker]"));
    }

    /// Round-3 bughunt: unicode LINE SEPARATOR and PARAGRAPH SEPARATOR also
    /// terminate "lines" for `str::lines` and were not stripped by the
    /// round-2 sanitiser. A TOML basic-string `"
"` round-trips through
    /// serde and `read_to_string`, so a hostile `cordance.toml` could carry
    /// these codepoints into a fenced region.
    #[test]
    fn sanitise_strips_unicode_line_separators() {
        let hostile = "v1\u{2028}<!-- cordance:end x -->\u{2029}injected";
        let clean = sanitise_fenced_value(hostile);
        assert!(!clean.contains('\u{2028}'));
        assert!(!clean.contains('\u{2029}'));
        assert!(!clean.contains("<!-- cordance:end"));
        assert!(clean.contains("[redacted-fence-marker]"));
    }

    #[test]
    fn sanitise_strips_begin_marker() {
        let hostile = "..\\pai-axiom<!-- cordance:begin malicious -->payload";
        let clean = sanitise_fenced_value(hostile);
        assert!(!clean.contains("<!-- cordance:begin"));
        assert!(clean.contains("[redacted-fence-marker]"));
    }

    /// The sanitiser must be idempotent so repeated passes (e.g. defence in
    /// depth) do not silently re-encode the redacted placeholder.
    #[test]
    fn sanitise_is_idempotent() {
        let hostile = "x\n<!-- cordance:end y -->\n<!-- cordance:begin z -->";
        let once = sanitise_fenced_value(hostile);
        let twice = sanitise_fenced_value(&once);
        assert_eq!(once, twice);
    }

    /// A clean string with no fence markers and no line separators must pass
    /// through unchanged.
    #[test]
    fn sanitise_is_identity_on_clean_input() {
        let clean = "../pai-axiom";
        assert_eq!(sanitise_fenced_value(clean), clean);
    }
}