use std::collections::HashMap;
use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag, TagEnd};
use serde::Serialize;
use toml::Value;
const OPENING_FENCE: &str = "```ccd-memory";
const ALLOWED_TYPES: &[&str] = &["rule", "constraint", "heuristic", "observation", "attempt"];
const ALLOWED_STATES: &[&str] = &["active", "promotion_candidate", "disputed", "superseded"];
const ALLOWED_ORIGINS: &[&str] = &["manual", "radar", "agent"];
const ALLOWED_DECAY_CLASSES: &[&str] = &["permanent", "stable", "active"];
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct StructuredMemoryEntry {
pub id: String,
#[serde(rename = "type")]
pub entry_type: String,
pub state: String,
pub created_at: String,
pub last_touched_session: u64,
pub origin: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub superseded_at: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub decay_class: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub expires_at: Option<String>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub tags: Vec<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub source_ref: Option<String>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub supersedes: Vec<String>,
pub content: String,
}
#[derive(Debug, Default)]
pub struct ParseReport {
pub block_count: usize,
pub entries: Vec<StructuredMemoryEntry>,
pub diagnostics: Vec<String>,
}
#[derive(Debug, Clone, Serialize)]
pub struct StructuredMemoryView {
pub status: &'static str,
pub profile_entries: Vec<StructuredMemoryEntry>,
pub repo_entries: Vec<StructuredMemoryEntry>,
#[serde(default)]
pub pod_entries: Vec<StructuredMemoryEntry>,
#[serde(default)]
pub branch_entries: Vec<StructuredMemoryEntry>,
#[serde(default)]
pub clone_entries: Vec<StructuredMemoryEntry>,
pub diagnostics: Vec<String>,
}
struct ParsedBlock {
start_line: usize,
id: Option<String>,
entry: Option<StructuredMemoryEntry>,
}
pub fn parse_document(contents: &str) -> ParseReport {
let mut report = ParseReport::default();
let mut blocks = Vec::new();
let mut in_ccd_memory = false;
let mut fence_start_line = 0usize;
let mut text_end_byte = 0usize;
let mut payload = String::new();
let parser = Parser::new_ext(contents, Options::empty());
for (event, range) in parser.into_offset_iter() {
match event {
Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(ref info)))
if info.as_ref().starts_with("ccd-memory") =>
{
fence_start_line = line_number_at(contents, range.start);
if info.as_ref() != "ccd-memory" {
report.diagnostics.push(format!(
"line {fence_start_line}: opening fence must be exactly `{OPENING_FENCE}`"
));
}
in_ccd_memory = true;
payload.clear();
text_end_byte = range.start;
}
Event::Text(text) if in_ccd_memory => {
payload.push_str(text.as_ref());
text_end_byte = range.end;
}
Event::End(TagEnd::CodeBlock) if in_ccd_memory => {
in_ccd_memory = false;
let tail = &contents[text_end_byte..range.end];
if !tail.contains("```") {
report.diagnostics.push(format!(
"line {fence_start_line}: unterminated `ccd-memory` block"
));
} else {
report.block_count += 1;
blocks.push(parse_block(
fence_start_line,
&payload,
&mut report.diagnostics,
));
}
}
_ => {}
}
}
let mut first_seen = HashMap::<String, usize>::new();
for block in blocks {
if let Some(id) = &block.id {
if let Some(first_line) = first_seen.get(id) {
report.diagnostics.push(format!(
"line {}: duplicate memory entry id `{}` (first seen at line {})",
block.start_line, id, first_line
));
} else {
first_seen.insert(id.clone(), block.start_line);
}
}
if let Some(entry) = block.entry {
report.entries.push(entry);
}
}
report
}
fn line_number_at(contents: &str, byte_offset: usize) -> usize {
contents[..byte_offset]
.bytes()
.filter(|&b| b == b'\n')
.count()
+ 1
}
pub fn inspect_sources_with_branch_and_clone(
profile_contents: Option<&str>,
repo_contents: Option<&str>,
pod_contents: Option<&str>,
branch_contents: Option<&str>,
clone_contents: Option<&str>,
) -> StructuredMemoryView {
let profile_report = profile_contents.map(parse_document).unwrap_or_default();
let repo_report = repo_contents.map(parse_document).unwrap_or_default();
let pod_report = pod_contents.map(parse_document).unwrap_or_default();
let branch_report = branch_contents.map(parse_document).unwrap_or_default();
let clone_report = clone_contents.map(parse_document).unwrap_or_default();
let mut diagnostics = profile_report
.diagnostics
.into_iter()
.map(|message| format!("profile: {message}"))
.collect::<Vec<_>>();
diagnostics.extend(
repo_report
.diagnostics
.into_iter()
.map(|message| format!("repo: {message}")),
);
diagnostics.extend(
pod_report
.diagnostics
.into_iter()
.map(|message| format!("pod: {message}")),
);
diagnostics.extend(
branch_report
.diagnostics
.into_iter()
.map(|message| format!("branch: {message}")),
);
diagnostics.extend(
clone_report
.diagnostics
.into_iter()
.map(|message| format!("clone: {message}")),
);
let status = if !diagnostics.is_empty() {
"invalid"
} else if !profile_report.entries.is_empty()
|| !repo_report.entries.is_empty()
|| !pod_report.entries.is_empty()
|| !branch_report.entries.is_empty()
|| !clone_report.entries.is_empty()
{
"loaded"
} else {
"none"
};
StructuredMemoryView {
status,
profile_entries: profile_report.entries,
repo_entries: repo_report.entries,
pod_entries: pod_report.entries,
branch_entries: branch_report.entries,
clone_entries: clone_report.entries,
diagnostics,
}
}
fn parse_block(start_line: usize, payload: &str, diagnostics: &mut Vec<String>) -> ParsedBlock {
let raw = match toml::from_str::<Value>(payload) {
Ok(value) => value,
Err(error) => {
diagnostics.push(format!(
"line {start_line}: invalid TOML in `ccd-memory` block: {error}"
));
return ParsedBlock {
start_line,
id: None,
entry: None,
};
}
};
let Some(table) = raw.as_table() else {
diagnostics.push(format!(
"line {start_line}: `ccd-memory` block payload must be a TOML table"
));
return ParsedBlock {
start_line,
id: None,
entry: None,
};
};
let mut errors = Vec::new();
let id = required_non_empty_string(table, "id", &mut errors);
let entry_type = required_non_empty_string(table, "type", &mut errors);
let state = required_non_empty_string(table, "state", &mut errors);
let created_at = required_non_empty_string(table, "created_at", &mut errors);
let last_touched_session =
required_non_negative_integer(table, "last_touched_session", &mut errors);
let origin = required_non_empty_string(table, "origin", &mut errors);
let content = required_non_empty_string(table, "content", &mut errors);
validate_allowed_value("type", entry_type.as_deref(), ALLOWED_TYPES, &mut errors);
validate_allowed_value("state", state.as_deref(), ALLOWED_STATES, &mut errors);
validate_allowed_value("origin", origin.as_deref(), ALLOWED_ORIGINS, &mut errors);
if let Some(value) = table.get("tags") {
validate_string_array(value, "tags", &mut errors);
}
if let Some(value) = table.get("source_ref") {
validate_non_empty_string_value(value, "source_ref", &mut errors);
}
if let Some(value) = table.get("supersedes") {
validate_string_or_string_array(value, "supersedes", &mut errors);
}
if let Some(value) = table.get("superseded_at") {
validate_non_empty_string_value(value, "superseded_at", &mut errors);
}
if let Some(value) = table.get("decay_class") {
validate_non_empty_string_value(value, "decay_class", &mut errors);
}
if let Some(value) = table.get("expires_at") {
validate_non_empty_string_value(value, "expires_at", &mut errors);
}
validate_allowed_value(
"decay_class",
table.get("decay_class").and_then(Value::as_str),
ALLOWED_DECAY_CLASSES,
&mut errors,
);
if table.get("superseded_at").is_some() && state.as_deref() != Some("superseded") {
errors.push("field `superseded_at` is only valid when `state` is `superseded`".to_owned());
}
if !errors.is_empty() {
diagnostics.extend(
errors
.into_iter()
.map(|error| format!("line {start_line}: {error}")),
);
return ParsedBlock {
start_line,
id,
entry: None,
};
}
ParsedBlock {
start_line,
id: id.clone(),
entry: Some(StructuredMemoryEntry {
id: id.expect("validated id"),
entry_type: entry_type.expect("validated type"),
state: state.expect("validated state"),
created_at: created_at.expect("validated created_at"),
last_touched_session: last_touched_session.expect("validated last_touched_session"),
origin: origin.expect("validated origin"),
superseded_at: optional_non_empty_string(table.get("superseded_at")),
decay_class: optional_non_empty_string(table.get("decay_class")),
expires_at: optional_non_empty_string(table.get("expires_at")),
tags: optional_string_array(table.get("tags")),
source_ref: optional_non_empty_string(table.get("source_ref")),
supersedes: optional_string_or_string_array(table.get("supersedes")),
content: content.expect("validated content"),
}),
}
}
fn required_non_empty_string(
table: &toml::value::Table,
field: &str,
errors: &mut Vec<String>,
) -> Option<String> {
let Some(value) = table.get(field) else {
errors.push(format!("missing required field `{field}`"));
return None;
};
match value {
Value::String(text) if !text.trim().is_empty() => Some(text.clone()),
Value::String(_) => {
errors.push(format!("field `{field}` must be a non-empty string"));
None
}
_ => {
errors.push(format!("field `{field}` must be a string"));
None
}
}
}
fn required_non_negative_integer(
table: &toml::value::Table,
field: &str,
errors: &mut Vec<String>,
) -> Option<u64> {
let Some(value) = table.get(field) else {
errors.push(format!("missing required field `{field}`"));
return None;
};
match value {
Value::Integer(number) if *number >= 0 => Some(*number as u64),
Value::Integer(_) => {
errors.push(format!("field `{field}` must be a non-negative integer"));
None
}
_ => {
errors.push(format!("field `{field}` must be an integer"));
None
}
}
}
fn validate_allowed_value(
field: &str,
value: Option<&str>,
allowed: &[&str],
errors: &mut Vec<String>,
) {
let Some(value) = value else {
return;
};
if allowed.contains(&value) {
return;
}
errors.push(format!(
"field `{field}` has unsupported value `{value}` (allowed: {})",
allowed
.iter()
.map(|item| format!("`{item}`"))
.collect::<Vec<_>>()
.join(", ")
));
}
fn validate_non_empty_string_value(value: &Value, field: &str, errors: &mut Vec<String>) {
match value {
Value::String(text) if !text.trim().is_empty() => {}
Value::String(_) => errors.push(format!("field `{field}` must be a non-empty string")),
_ => errors.push(format!("field `{field}` must be a string")),
}
}
fn validate_string_array(value: &Value, field: &str, errors: &mut Vec<String>) {
let Value::Array(values) = value else {
errors.push(format!("field `{field}` must be an array of strings"));
return;
};
if values
.iter()
.all(|value| matches!(value, Value::String(text) if !text.trim().is_empty()))
{
return;
}
errors.push(format!(
"field `{field}` must be an array of non-empty strings"
));
}
fn validate_string_or_string_array(value: &Value, field: &str, errors: &mut Vec<String>) {
match value {
Value::String(text) if !text.trim().is_empty() => {}
Value::Array(values)
if values
.iter()
.all(|value| matches!(value, Value::String(text) if !text.trim().is_empty())) => {}
Value::String(_) => errors.push(format!("field `{field}` must be a non-empty string")),
_ => errors.push(format!(
"field `{field}` must be a non-empty string or an array of non-empty strings"
)),
}
}
fn optional_non_empty_string(value: Option<&Value>) -> Option<String> {
match value {
Some(Value::String(text)) if !text.trim().is_empty() => Some(text.clone()),
_ => None,
}
}
fn optional_string_array(value: Option<&Value>) -> Vec<String> {
match value {
Some(Value::Array(values)) => values
.iter()
.filter_map(|value| match value {
Value::String(text) if !text.trim().is_empty() => Some(text.clone()),
_ => None,
})
.collect(),
_ => Vec::new(),
}
}
fn optional_string_or_string_array(value: Option<&Value>) -> Vec<String> {
match value {
Some(Value::String(text)) if !text.trim().is_empty() => vec![text.clone()],
Some(Value::Array(values)) => values
.iter()
.filter_map(|value| match value {
Value::String(text) if !text.trim().is_empty() => Some(text.clone()),
_ => None,
})
.collect(),
_ => Vec::new(),
}
}
#[cfg(test)]
mod tests {
use super::{inspect_sources_with_branch_and_clone, parse_document};
#[test]
fn parses_mixed_narrative_and_structured_entries() {
let report = parse_document(
r#"# Memory
Some narrative text.
```ccd-memory
id = "mem_01"
type = "rule"
state = "active"
created_at = "2026-03-09T10:15:00Z"
last_touched_session = 12
origin = "manual"
tags = ["rust", "lint"]
content = "Always run clippy before committing."
```
"#,
);
assert!(report.diagnostics.is_empty());
assert_eq!(report.block_count, 1);
assert_eq!(report.entries.len(), 1);
assert_eq!(report.entries[0].id, "mem_01");
assert_eq!(report.entries[0].tags, vec!["rust", "lint"]);
}
#[test]
fn parses_optional_lifecycle_metadata() {
let report = parse_document(
r#"```ccd-memory
id = "mem_01"
type = "rule"
state = "active"
created_at = "2026-03-09T10:15:00Z"
last_touched_session = 12
origin = "manual"
decay_class = "stable"
expires_at = "2026-06-01T00:00:00Z"
content = "Always run clippy before committing."
```
"#,
);
assert!(report.diagnostics.is_empty());
assert_eq!(report.entries[0].decay_class.as_deref(), Some("stable"));
assert_eq!(
report.entries[0].expires_at.as_deref(),
Some("2026-06-01T00:00:00Z")
);
}
#[test]
fn reports_schema_and_enum_errors() {
let report = parse_document(
r#"```ccd-memory
id = "mem_01"
type = "unknown"
state = "stale"
created_at = "2026-03-09T10:15:00Z"
last_touched_session = 12
origin = "human"
decay_class = "volatile"
```
"#,
);
assert!(report.entries.is_empty());
assert!(report
.diagnostics
.iter()
.any(|message| message.contains("missing required field `content`")));
assert!(report
.diagnostics
.iter()
.any(|message| message.contains("field `type` has unsupported value `unknown`")));
assert!(report
.diagnostics
.iter()
.any(|message| message.contains("field `state` has unsupported value `stale`")));
assert!(report
.diagnostics
.iter()
.any(|message| message.contains("field `origin` has unsupported value `human`")));
assert!(report.diagnostics.iter().any(
|message| message.contains("field `decay_class` has unsupported value `volatile`")
));
}
#[test]
fn reports_unterminated_blocks() {
let report = parse_document(
r#"```ccd-memory
id = "mem_01"
"#,
);
assert!(report.entries.is_empty());
assert_eq!(
report.diagnostics,
vec!["line 1: unterminated `ccd-memory` block".to_owned()]
);
}
#[test]
fn empty_document_produces_no_blocks() {
let report = parse_document("");
assert_eq!(report.block_count, 0);
assert!(report.entries.is_empty());
assert!(report.diagnostics.is_empty());
}
#[test]
fn narrative_only_document_produces_no_blocks() {
let report = parse_document("# Memory\n\nSome notes without any code blocks.\n");
assert_eq!(report.block_count, 0);
assert!(report.entries.is_empty());
assert!(report.diagnostics.is_empty());
}
#[test]
fn non_ccd_memory_fence_is_ignored() {
let report = parse_document("```toml\nkey = \"value\"\n```\n");
assert_eq!(report.block_count, 0);
assert!(report.entries.is_empty());
}
#[test]
fn malformed_fence_info_warns() {
let report = parse_document(
r#"```ccd-memory extra-stuff
id = "mem_01"
type = "rule"
state = "active"
created_at = "2026-03-09"
last_touched_session = 1
origin = "manual"
content = "Test."
```
"#,
);
assert!(report
.diagnostics
.iter()
.any(|d| d.contains("opening fence must be exactly")));
}
#[test]
fn invalid_toml_reports_parse_error() {
let report = parse_document("```ccd-memory\n{not valid toml\n```\n");
assert!(report.entries.is_empty());
assert!(report
.diagnostics
.iter()
.any(|d| d.contains("invalid TOML")));
}
#[test]
fn duplicate_ids_reported() {
let doc = r#"```ccd-memory
id = "dup"
type = "rule"
state = "active"
created_at = "2026-03-09"
last_touched_session = 1
origin = "manual"
content = "First."
```
```ccd-memory
id = "dup"
type = "rule"
state = "active"
created_at = "2026-03-09"
last_touched_session = 2
origin = "manual"
content = "Second."
```
"#;
let report = parse_document(doc);
assert!(report
.diagnostics
.iter()
.any(|d| d.contains("duplicate memory entry id `dup`")));
}
#[test]
fn multiple_valid_blocks_all_parsed() {
let doc = r#"```ccd-memory
id = "a"
type = "rule"
state = "active"
created_at = "2026-03-09"
last_touched_session = 1
origin = "manual"
content = "Alpha."
```
```ccd-memory
id = "b"
type = "heuristic"
state = "promotion_candidate"
created_at = "2026-03-09"
last_touched_session = 2
origin = "radar"
content = "Beta."
```
"#;
let report = parse_document(doc);
assert_eq!(report.block_count, 2);
assert_eq!(report.entries.len(), 2);
assert!(report.diagnostics.is_empty());
assert_eq!(report.entries[0].id, "a");
assert_eq!(report.entries[1].id, "b");
}
#[test]
fn field_type_mismatch_integer_where_string_expected() {
let report = parse_document(
r#"```ccd-memory
id = 42
type = "rule"
state = "active"
created_at = "2026-03-09"
last_touched_session = 1
origin = "manual"
content = "Test."
```
"#,
);
assert!(report.entries.is_empty());
assert!(report
.diagnostics
.iter()
.any(|d| d.contains("field `id` must be a string")));
}
#[test]
fn whitespace_only_string_rejected() {
let report = parse_document(
r#"```ccd-memory
id = " "
type = "rule"
state = "active"
created_at = "2026-03-09"
last_touched_session = 1
origin = "manual"
content = "Test."
```
"#,
);
assert!(report.entries.is_empty());
assert!(report
.diagnostics
.iter()
.any(|d| d.contains("field `id` must be a non-empty string")));
}
#[test]
fn negative_last_touched_session_rejected() {
let report = parse_document(
r#"```ccd-memory
id = "mem_01"
type = "rule"
state = "active"
created_at = "2026-03-09"
last_touched_session = -1
origin = "manual"
content = "Test."
```
"#,
);
assert!(report.entries.is_empty());
assert!(report.diagnostics.iter().any(|d| {
d.contains("field `last_touched_session` must be a non-negative integer")
}));
}
#[test]
fn supersedes_as_single_string() {
let report = parse_document(
r#"```ccd-memory
id = "mem_02"
type = "rule"
state = "active"
created_at = "2026-03-09"
last_touched_session = 1
origin = "manual"
supersedes = "mem_01"
content = "Replaces old rule."
```
"#,
);
assert!(report.diagnostics.is_empty());
assert_eq!(report.entries[0].supersedes, vec!["mem_01"]);
}
#[test]
fn tags_with_non_string_element_rejected() {
let report = parse_document(
r#"```ccd-memory
id = "mem_01"
type = "rule"
state = "active"
created_at = "2026-03-09"
last_touched_session = 1
origin = "manual"
tags = ["valid", 42]
content = "Test."
```
"#,
);
assert!(report.entries.is_empty());
assert!(report
.diagnostics
.iter()
.any(|d| d.contains("field `tags` must be an array of non-empty strings")));
}
#[test]
fn source_ref_must_be_non_empty_string() {
let report = parse_document(
r#"```ccd-memory
id = "mem_01"
type = "rule"
state = "active"
created_at = "2026-03-09"
last_touched_session = 1
origin = "manual"
source_ref = ""
content = "Test."
```
"#,
);
assert!(report
.diagnostics
.iter()
.any(|d| d.contains("field `source_ref` must be a non-empty string")));
}
#[test]
fn superseded_at_requires_superseded_state() {
let report = parse_document(
r#"```ccd-memory
id = "mem_01"
type = "rule"
state = "active"
created_at = "2026-03-09"
last_touched_session = 1
origin = "manual"
superseded_at = "2026-03-12T09:30:00Z"
content = "Test."
```
"#,
);
assert!(report.entries.is_empty());
assert!(report.diagnostics.iter().any(|d| {
d.contains("field `superseded_at` is only valid when `state` is `superseded`")
}));
}
#[test]
fn inspect_sources_both_none_returns_none_status() {
let view = inspect_sources_with_branch_and_clone(None, None, None, None, None);
assert_eq!(view.status, "none");
assert!(view.profile_entries.is_empty());
assert!(view.repo_entries.is_empty());
assert!(view.diagnostics.is_empty());
}
#[test]
fn inspect_sources_invalid_reports_invalid_status() {
let view = inspect_sources_with_branch_and_clone(
Some("```ccd-memory\n{broken\n```\n"),
None,
None,
None,
None,
);
assert_eq!(view.status, "invalid");
assert!(view.diagnostics.iter().any(|d| d.starts_with("profile:")));
}
#[test]
fn inspects_profile_and_repo_sources_separately() {
let view = inspect_sources_with_branch_and_clone(
Some(
r#"```ccd-memory
id = "mem_profile"
type = "rule"
state = "superseded"
created_at = "2026-03-09T10:15:00Z"
last_touched_session = 12
origin = "manual"
superseded_at = "2026-03-12T09:30:00Z"
source_ref = "handoff.completed_state[1]"
decay_class = "permanent"
expires_at = "2026-12-31T23:59:59Z"
content = "Profile rule."
```"#,
),
Some(
r#"```ccd-memory
id = "mem_repo"
type = "heuristic"
state = "superseded"
created_at = "2026-03-09T10:20:00Z"
last_touched_session = 13
origin = "radar"
superseded_at = "2026-03-11T07:00:00Z"
supersedes = ["mem_old"]
content = "Repo heuristic."
```"#,
),
None,
None,
None,
);
assert_eq!(view.status, "loaded");
assert_eq!(view.profile_entries.len(), 1);
assert_eq!(view.repo_entries.len(), 1);
assert_eq!(
view.profile_entries[0].source_ref.as_deref(),
Some("handoff.completed_state[1]")
);
assert_eq!(
view.profile_entries[0].superseded_at.as_deref(),
Some("2026-03-12T09:30:00Z")
);
assert_eq!(
view.profile_entries[0].decay_class.as_deref(),
Some("permanent")
);
assert_eq!(
view.profile_entries[0].expires_at.as_deref(),
Some("2026-12-31T23:59:59Z")
);
assert_eq!(view.repo_entries[0].supersedes, vec!["mem_old"]);
assert_eq!(
view.repo_entries[0].superseded_at.as_deref(),
Some("2026-03-11T07:00:00Z")
);
}
}