use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
pub struct FenceMarker {
pub key: String,
pub begin_line: usize,
pub end_line: usize,
}
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
pub struct FencedRegion {
pub key: String,
pub begin_line: usize,
pub end_line: usize,
pub body: String,
}
const BEGIN_PREFIX: &str = "<!-- cordance:begin ";
const END_PREFIX: &str = "<!-- cordance:end ";
const MARKER_SUFFIX: &str = " -->";
pub fn find_regions(input: &str) -> Result<Vec<FencedRegion>, FenceError> {
let mut regions = Vec::new();
let mut open: Option<(String, usize)> = None;
for (i, line) in input.lines().enumerate() {
let trimmed = line.trim();
if let Some(rest) = trimmed.strip_prefix(BEGIN_PREFIX) {
if let Some(key) = rest.strip_suffix(MARKER_SUFFIX) {
if open.is_some() {
return Err(FenceError::Nested(i));
}
open = Some((key.trim().to_string(), i));
}
} else if let Some(rest) = trimmed.strip_prefix(END_PREFIX) {
if let Some(key) = rest.strip_suffix(MARKER_SUFFIX) {
let key = key.trim().to_string();
let (open_key, begin) = open.take().ok_or(FenceError::UnmatchedEnd(i))?;
if open_key != key {
return Err(FenceError::KeyMismatch {
begin_line: begin,
end_line: i,
begin_key: open_key,
end_key: key,
});
}
let body = input
.lines()
.skip(begin + 1)
.take(i - begin - 1)
.collect::<Vec<_>>()
.join("\n");
regions.push(FencedRegion {
key,
begin_line: begin,
end_line: i,
body,
});
}
}
}
if let Some((key, line)) = open {
return Err(FenceError::UnmatchedBegin { line, key });
}
Ok(regions)
}
#[derive(Clone, Debug, thiserror::Error)]
pub enum FenceError {
#[error("unmatched cordance:begin at line {line} (key='{key}')")]
UnmatchedBegin { line: usize, key: String },
#[error("unmatched cordance:end at line {0}")]
UnmatchedEnd(usize),
#[error("nested cordance:begin at line {0}")]
Nested(usize),
#[error(
"cordance fence key mismatch: begin at {begin_line} ('{begin_key}') vs end at \
{end_line} ('{end_key}')"
)]
KeyMismatch {
begin_line: usize,
end_line: usize,
begin_key: String,
end_key: String,
},
}
#[must_use]
pub fn sanitise_fenced_value(s: &str) -> String {
s.replace(['\n', '\r', '\u{2028}', '\u{2029}'], " ")
.replace("<!-- cordance:begin", "[redacted-fence-marker]")
.replace("<!-- cordance:end", "[redacted-fence-marker]")
}
#[must_use]
pub fn replace_region(input: &str, key: &str, new_body: &str) -> String {
replace_regions(input, &[(key, new_body)])
}
#[must_use]
pub fn replace_regions(input: &str, replacements: &[(&str, &str)]) -> String {
let mut output: Vec<String> = Vec::new();
let mut in_region: Option<String> = None;
for line in input.lines() {
let trimmed = line.trim();
if let Some(ref open_key) = in_region.clone() {
if let Some(rest) = trimmed.strip_prefix(END_PREFIX) {
if let Some(found_key) = rest.strip_suffix(MARKER_SUFFIX) {
if found_key.trim() == open_key.as_str() {
in_region = None;
output.push(line.to_string());
continue;
}
}
}
if !replacements.iter().any(|(k, _)| *k == open_key.as_str()) {
output.push(line.to_string());
}
continue;
}
if let Some(rest) = trimmed.strip_prefix(BEGIN_PREFIX) {
if let Some(found_key) = rest.strip_suffix(MARKER_SUFFIX) {
let found_key = found_key.trim();
output.push(line.to_string());
if let Some((_, new_body)) = replacements.iter().find(|(k, _)| *k == found_key) {
if !new_body.is_empty() {
output.push(String::from(*new_body));
}
}
in_region = Some(found_key.to_string());
continue;
}
}
output.push(line.to_string());
}
output.join("\n")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn finds_simple_region() {
let s = "intro\n<!-- cordance:begin authority -->\nold body\n<!-- cordance:end authority -->\noutro";
let regions = find_regions(s).expect("parse");
assert_eq!(regions.len(), 1);
assert_eq!(regions[0].key, "authority");
assert_eq!(regions[0].body, "old body");
}
#[test]
fn replaces_region_body() {
let s = "<!-- cordance:begin a -->\nold\n<!-- cordance:end a -->";
let out = replace_region(s, "a", "new line 1\nnew line 2");
assert!(out.contains("new line 1"));
assert!(!out.contains("old"));
}
#[test]
fn rejects_unmatched_begin() {
let s = "<!-- cordance:begin x -->\nno end";
assert!(find_regions(s).is_err());
}
#[test]
fn rejects_key_mismatch() {
let s = "<!-- cordance:begin a -->\nbody\n<!-- cordance:end b -->";
assert!(matches!(
find_regions(s),
Err(FenceError::KeyMismatch { .. })
));
}
#[test]
fn crlf_line_endings_parsed() {
let s = "intro\r\n<!-- cordance:begin foo -->\r\nbody line\r\n<!-- cordance:end foo -->\r\noutro\r\n";
let regions = find_regions(s).expect("parse crlf");
assert_eq!(regions.len(), 1);
assert_eq!(regions[0].key, "foo");
assert!(
!regions[0].body.contains('\r'),
"body should not contain CR"
);
}
#[test]
fn replace_regions_multi_in_one_pass() {
let s = "<!-- cordance:begin a -->\nold-a\n<!-- cordance:end a -->\n<!-- cordance:begin b -->\nold-b\n<!-- cordance:end b -->";
let out = replace_regions(s, &[("a", "new-a"), ("b", "new-b")]);
assert!(out.contains("new-a"));
assert!(out.contains("new-b"));
assert!(!out.contains("old-a"));
assert!(!out.contains("old-b"));
}
#[test]
fn replace_regions_only_updates_listed_keys() {
let s = "<!-- cordance:begin a -->\nold-a\n<!-- cordance:end a -->\n<!-- cordance:begin b -->\nold-b\n<!-- cordance:end b -->\n<!-- cordance:begin c -->\nold-c\n<!-- cordance:end c -->";
let out = replace_regions(s, &[("a", "new-a"), ("c", "new-c")]);
assert!(out.contains("new-a"));
assert!(out.contains("old-b"), "b should be unchanged");
assert!(out.contains("new-c"));
}
#[test]
fn replace_regions_preserves_unfenced_content_exactly() {
let prefix = "prefix text unchanged\n";
let suffix = "\nsuffix text unchanged";
let s = format!("{prefix}<!-- cordance:begin x -->\nold\n<!-- cordance:end x -->{suffix}");
let out = replace_regions(&s, &[("x", "new")]);
assert!(out.starts_with(prefix));
assert!(out.ends_with(suffix));
}
#[test]
fn empty_body_between_fences() {
let s = "<!-- cordance:begin empty -->\n<!-- cordance:end empty -->";
let regions = find_regions(s).expect("parse");
assert_eq!(regions.len(), 1);
assert_eq!(regions[0].body, "");
}
#[test]
fn adjacent_fences_different_keys() {
let s = "<!-- cordance:begin x -->\nbody-x\n<!-- cordance:end x -->\n<!-- cordance:begin y -->\nbody-y\n<!-- cordance:end y -->";
let regions = find_regions(s).expect("parse");
assert_eq!(regions.len(), 2);
assert_eq!(regions[0].key, "x");
assert_eq!(regions[1].key, "y");
}
#[test]
fn key_with_dashes_parsed() {
let s = "<!-- cordance:begin hard-rules -->\ncontent\n<!-- cordance:end hard-rules -->";
let regions = find_regions(s).expect("parse");
assert_eq!(regions[0].key, "hard-rules");
}
#[test]
fn unicode_body_preserved() {
let s = "<!-- cordance:begin u -->\n🦀 rust ≥ 1.88\n<!-- cordance:end u -->";
let regions = find_regions(s).expect("parse");
assert!(regions[0].body.contains("🦀"));
let out = replace_regions(s, &[]); assert!(out.contains("🦀"));
}
#[test]
fn file_with_no_fences_returns_empty() {
let s = "# Readme\n\nNo fences here.";
let r = find_regions(s).expect("parse");
assert!(r.is_empty());
}
#[test]
fn replace_regions_no_matching_key_is_noop() {
let s = "<!-- cordance:begin a -->\nbody\n<!-- cordance:end a -->";
let out = replace_regions(s, &[("nonexistent", "ignored")]);
assert!(out.contains("body"));
}
#[test]
fn sanitise_strips_lf_and_cr() {
let hostile = "v1\n<!-- cordance:end x -->\r\ninjected";
let clean = sanitise_fenced_value(hostile);
assert!(!clean.contains('\n'));
assert!(!clean.contains('\r'));
assert!(!clean.contains("<!-- cordance:end"));
assert!(clean.contains("[redacted-fence-marker]"));
}
#[test]
fn sanitise_strips_unicode_line_separators() {
let hostile = "v1\u{2028}<!-- cordance:end x -->\u{2029}injected";
let clean = sanitise_fenced_value(hostile);
assert!(!clean.contains('\u{2028}'));
assert!(!clean.contains('\u{2029}'));
assert!(!clean.contains("<!-- cordance:end"));
assert!(clean.contains("[redacted-fence-marker]"));
}
#[test]
fn sanitise_strips_begin_marker() {
let hostile = "..\\pai-axiom<!-- cordance:begin malicious -->payload";
let clean = sanitise_fenced_value(hostile);
assert!(!clean.contains("<!-- cordance:begin"));
assert!(clean.contains("[redacted-fence-marker]"));
}
#[test]
fn sanitise_is_idempotent() {
let hostile = "x\n<!-- cordance:end y -->\n<!-- cordance:begin z -->";
let once = sanitise_fenced_value(hostile);
let twice = sanitise_fenced_value(&once);
assert_eq!(once, twice);
}
#[test]
fn sanitise_is_identity_on_clean_input() {
let clean = "../pai-axiom";
assert_eq!(sanitise_fenced_value(clean), clean);
}
}