pub(crate) struct MarkdownLineExtraction<'a> {
pub(crate) inactive_headings: &'a [&'a str],
pub(crate) inactive_item_prefixes: &'a [&'a str],
pub(crate) inline_skip_keywords: &'a [&'a str],
}
pub(crate) fn extract_generic_markdown_lines(
contents: &str,
options: MarkdownLineExtraction<'_>,
) -> Vec<String> {
let mut lines = Vec::new();
let mut include_section = true;
for raw_line in contents.lines() {
let line = raw_line.trim();
if line.is_empty() {
continue;
}
if line.starts_with('#') {
include_section =
!contains_any_keyword_case_insensitive(line, options.inactive_headings);
continue;
}
if !include_section || line == "_None._" {
continue;
}
if let Some(item) = line.strip_prefix("- ") {
if !starts_with_any_prefix_case_insensitive(item.trim(), options.inactive_item_prefixes)
{
lines.push(item.trim().to_owned());
}
continue;
}
if let Some(item) = parse_numbered_item(line) {
if !starts_with_any_prefix_case_insensitive(&item, options.inactive_item_prefixes) {
lines.push(item);
}
continue;
}
if !line.starts_with("```")
&& !contains_any_keyword_case_insensitive(line, options.inline_skip_keywords)
{
lines.push(line.to_owned());
}
}
lines
}
pub(crate) fn strip_fenced_blocks(
contents: &str,
opening_fence: &str,
closing_fence: &str,
) -> String {
let mut retained = Vec::new();
let mut in_block = false;
for line in contents.lines() {
let trimmed = line.trim();
if in_block {
if trimmed == closing_fence {
in_block = false;
}
continue;
}
if trimmed.starts_with(opening_fence) {
in_block = true;
continue;
}
retained.push(line);
}
retained.join("\n")
}
pub(crate) fn starts_with_any_prefix_case_insensitive(text: &str, prefixes: &[&str]) -> bool {
let lowercase = text.trim().to_ascii_lowercase();
prefixes
.iter()
.any(|prefix| lowercase.starts_with(&prefix.to_ascii_lowercase()))
}
pub(crate) fn parse_numbered_item(line: &str) -> Option<String> {
let (number, rest) = line.split_once(". ")?;
if number.is_empty() || !number.chars().all(|c| c.is_ascii_digit()) {
return None;
}
Some(rest.trim().to_owned())
}
fn contains_any_keyword_case_insensitive(text: &str, keywords: &[&str]) -> bool {
let lowercase = text.to_ascii_lowercase();
keywords
.iter()
.any(|keyword| lowercase.contains(&keyword.to_ascii_lowercase()))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn extract_lines_respects_inactive_sections_and_prefixes() {
let lines = extract_generic_markdown_lines(
r#"
# Title
## Active Items
- Keep this
- Superseded: skip this
1. Keep numbered
stale: skip freeform
## Archived
- Skip archived section
"#,
MarkdownLineExtraction {
inactive_headings: &["archived"],
inactive_item_prefixes: &["archived:", "stale:", "superseded:"],
inline_skip_keywords: &["archived:", "stale:"],
},
);
assert_eq!(lines, vec!["Keep this", "Keep numbered"]);
}
#[test]
fn strip_fenced_blocks_removes_matching_block_contents() {
let stripped = strip_fenced_blocks(
"keep\n```ccd-memory\ninside\n```\nafter",
"```ccd-memory",
"```",
);
assert_eq!(stripped, "keep\nafter");
}
}