use crate::span::Span;
#[derive(Debug)]
pub struct FrontmatterBlock {
pub span: Span,
#[allow(dead_code, reason = "used by tree construction ticket 06a")]
pub content_span: Span,
pub entries: Vec<FmNode>,
pub diagnostics: Vec<FmDiagnostic>,
}
#[derive(Debug)]
pub enum FmNode {
Mapping {
key: ScalarSpan,
value: FmValue,
span: Span,
},
SequenceItem {
value: FmValue,
#[allow(dead_code, reason = "used by tree construction ticket 06a")]
span: Span,
},
}
#[derive(Debug)]
pub enum FmValue {
Scalar(ScalarSpan),
Sequence(Vec<FmNode>),
Mapping(Vec<FmNode>),
FlowSequence {
#[allow(dead_code, reason = "used by tree construction ticket 06a")]
span: Span,
items: Vec<ScalarSpan>,
},
FlowMapping {
#[allow(dead_code, reason = "used by tree construction ticket 06a")]
span: Span,
#[allow(dead_code, reason = "used by tree construction ticket 06a")]
entries: Vec<(ScalarSpan, ScalarSpan)>,
},
BlockScalar {
#[allow(dead_code, reason = "used by tree construction ticket 06a")]
span: Span,
},
}
#[derive(Debug)]
pub struct ScalarSpan {
pub span: Span,
pub text: String,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum FmSeverity {
Error,
#[allow(dead_code, reason = "used by structural diagnostics ticket 07")]
Warning,
}
#[derive(Debug)]
pub struct FmDiagnostic {
pub span: Span,
pub severity: FmSeverity,
pub message: String,
}
pub const BOM: &[u8] = &[0xEF, 0xBB, 0xBF];
pub fn strip_bom(source: &str) -> (&str, usize) {
if source.as_bytes().starts_with(BOM) {
(&source[3..], 3)
} else {
(source, 0)
}
}
const fn utf8_seq_len(lead: u8) -> usize {
match lead {
0xF0..=0xF7 => 4,
0xE0..=0xEF => 3,
0xC0..=0xDF => 2,
_ => 1,
}
}
pub fn push_utf8_char(text: &mut String, bytes: &[u8], start: usize) -> usize {
let lead = bytes[start];
if lead.is_ascii() {
text.push(char::from(lead));
return start + 1;
}
let end = (start + utf8_seq_len(lead)).min(bytes.len());
match std::str::from_utf8(&bytes[start..end]) {
Ok(s) => text.push_str(s),
Err(_) => text.push(char::REPLACEMENT_CHARACTER),
}
end
}
pub fn extract_backlinks(
block: &FrontmatterBlock,
source: &str,
) -> std::collections::HashMap<String, Vec<String>> {
let mut backlinks = std::collections::HashMap::new();
for entry in &block.entries {
if let FmNode::Mapping { key, value, .. } = entry {
if key.text != "backlinks" {
continue;
}
let FmValue::Mapping(predicates) = value else {
break;
};
for pred_entry in predicates {
let FmNode::Mapping {
key: pred_key,
value: pred_value,
..
} = pred_entry
else {
continue;
};
let mut paths = Vec::new();
match pred_value {
FmValue::Sequence(items) => {
for item in items {
if let FmNode::SequenceItem {
value: FmValue::Scalar(s),
..
} = item
{
paths.push(s.text.clone());
}
}
}
FmValue::FlowSequence { items, .. } => {
for item in items {
paths.push(item.text.clone());
}
}
_ => {}
}
backlinks.insert(pred_key.text.clone(), paths);
}
break;
}
}
let _ = source; backlinks
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ExceptionLint {
StaleReferences,
BarePaths,
}
impl ExceptionLint {
#[must_use]
pub const fn key(self) -> &'static str {
match self {
Self::StaleReferences => "stale_references",
Self::BarePaths => "bare_paths",
}
}
#[must_use]
pub const fn noun(self) -> &'static str {
match self {
Self::StaleReferences => "stale references",
Self::BarePaths => "bare paths",
}
}
}
#[derive(Debug, Clone)]
pub struct ExceptionEntry {
pub reference: String,
pub reason: String,
pub key_span: Span,
pub line: usize,
}
#[derive(Debug, Clone)]
pub struct CountKey {
pub expected: usize,
pub reason: String,
pub key_span: Span,
pub line: usize,
pub raw: String,
}
#[must_use]
pub fn is_count_key(key: &str) -> bool {
!key.is_empty() && key.bytes().all(|b| b.is_ascii_digit())
}
#[derive(Debug, Default)]
pub struct Exceptions {
pub stale_references: Vec<ExceptionEntry>,
pub bare_paths: Vec<ExceptionEntry>,
pub stale_references_count: Option<CountKey>,
pub bare_paths_count: Option<CountKey>,
}
impl Exceptions {
#[must_use]
pub fn entries(&self, lint: ExceptionLint) -> &[ExceptionEntry] {
match lint {
ExceptionLint::StaleReferences => &self.stale_references,
ExceptionLint::BarePaths => &self.bare_paths,
}
}
#[must_use]
pub fn count_key(&self, lint: ExceptionLint) -> Option<&CountKey> {
match lint {
ExceptionLint::StaleReferences => self.stale_references_count.as_ref(),
ExceptionLint::BarePaths => self.bare_paths_count.as_ref(),
}
}
}
#[must_use]
pub fn extract_exceptions(block: &FrontmatterBlock, source: &str) -> Exceptions {
let mut exceptions = Exceptions::default();
for entry in &block.entries {
let FmNode::Mapping { key, value, .. } = entry else {
continue;
};
if key.text != "exceptions" {
continue;
}
let FmValue::Mapping(namespaces) = value else {
break;
};
for ns_entry in namespaces {
let FmNode::Mapping {
key: ns_key,
value: ns_value,
..
} = ns_entry
else {
continue;
};
let lint = match ns_key.text.as_str() {
"stale_references" => ExceptionLint::StaleReferences,
"bare_paths" => ExceptionLint::BarePaths,
_ => continue,
};
let FmValue::Mapping(refs) = ns_value else {
continue;
};
for ref_entry in refs {
let FmNode::Mapping {
key: ref_key,
value: ref_value,
..
} = ref_entry
else {
continue;
};
let reason = match ref_value {
FmValue::Scalar(s) => s.text.clone(),
_ => String::new(),
};
let key_line = byte_offset_to_line(source, ref_key.span.start);
if is_count_key(&ref_key.text) {
let count_slot = match lint {
ExceptionLint::StaleReferences => &mut exceptions.stale_references_count,
ExceptionLint::BarePaths => &mut exceptions.bare_paths_count,
};
if count_slot.is_none() {
*count_slot = Some(CountKey {
expected: ref_key.text.parse().unwrap_or(usize::MAX),
reason,
key_span: ref_key.span,
line: key_line,
raw: ref_key.text.clone(),
});
}
continue;
}
let bucket = match lint {
ExceptionLint::StaleReferences => &mut exceptions.stale_references,
ExceptionLint::BarePaths => &mut exceptions.bare_paths,
};
bucket.push(ExceptionEntry {
reference: ref_key.text.clone(),
reason,
key_span: ref_key.span,
line: key_line,
});
}
}
break;
}
exceptions
}
pub fn find_predicate_line(block: &FrontmatterBlock, predicate: &str, source: &str) -> usize {
for entry in &block.entries {
if let FmNode::Mapping { key, value, .. } = entry {
if key.text != "backlinks" {
continue;
}
let FmValue::Mapping(predicates) = value else {
break;
};
for pred_entry in predicates {
if let FmNode::Mapping { key: pred_key, .. } = pred_entry
&& pred_key.text == predicate
{
return byte_offset_to_line(source, pred_key.span.start);
}
}
}
}
1
}
pub fn count_line_breaks(bytes: &[u8]) -> usize {
let mut count = 0;
let mut i = 0;
while i < bytes.len() {
match bytes[i] {
b'\n' => {
count += 1;
i += 1;
}
b'\r' => {
count += 1;
i += if bytes.get(i + 1) == Some(&b'\n') {
2
} else {
1
};
}
_ => i += 1,
}
}
count
}
pub fn line_count(source: &str) -> usize {
let bytes = source.as_bytes();
let breaks = count_line_breaks(bytes);
if bytes.is_empty() || matches!(bytes.last(), Some(b'\n' | b'\r')) {
breaks
} else {
breaks + 1
}
}
pub fn byte_offset_to_line(source: &str, offset: usize) -> usize {
count_line_breaks(&source.as_bytes()[..offset.min(source.len())]) + 1
}
#[cfg(test)]
#[allow(clippy::expect_used, reason = "tests use expect for clarity")]
mod tests {
use super::{ExceptionLint, extract_exceptions, is_count_key};
use crate::yaml::parse_frontmatter_block;
#[test]
fn extract_exceptions_both_namespaces() {
let source = "---\nexceptions:\n stale_references:\n \"a.md\": \"reason a\"\n bare_paths:\n \"b.md\": \"reason b\"\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
let ex = extract_exceptions(&block, source);
assert_eq!(
ex.stale_references.len(),
1,
"one stale_references entry: {ex:?}"
);
assert_eq!(ex.bare_paths.len(), 1, "one bare_paths entry: {ex:?}");
assert_eq!(
ex.stale_references[0].reference, "a.md",
"stale key is the reference: {ex:?}"
);
assert_eq!(
ex.stale_references[0].reason, "reason a",
"stale reason is the value: {ex:?}"
);
assert_eq!(
ex.entries(ExceptionLint::BarePaths)[0].reference,
"b.md",
"entries() returns the bare_paths bucket: {ex:?}"
);
}
#[test]
fn extract_exceptions_empty_reason_retained() {
let source = "---\nexceptions:\n stale_references:\n \"a.md\": \"\"\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
let ex = extract_exceptions(&block, source);
assert_eq!(ex.stale_references.len(), 1, "one entry parsed: {ex:?}");
assert!(
ex.stale_references[0].reason.is_empty(),
"the empty reason is retained: {ex:?}"
);
}
#[test]
fn extract_exceptions_unknown_namespace_ignored() {
let source = "---\nexceptions:\n not_a_lint:\n \"a.md\": \"r\"\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
let ex = extract_exceptions(&block, source);
assert!(
ex.stale_references.is_empty() && ex.bare_paths.is_empty(),
"an unknown lint namespace yields no entries: {ex:?}"
);
}
#[test]
fn extract_exceptions_records_key_line() {
let source = "---\nexceptions:\n stale_references:\n \"a.md\": \"r\"\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
let ex = extract_exceptions(&block, source);
assert_eq!(
ex.stale_references[0].line, 4,
"the key's line is recorded: {ex:?}"
);
}
#[test]
fn is_count_key_discriminates_by_shape() {
assert!(is_count_key("31"), "all-digits is a count key");
assert!(is_count_key("0"), "a single digit is all-digits");
assert!(!is_count_key("31.md"), "a `.md` name is a literal ref");
assert!(!is_count_key("a/31"), "a slashed path is a literal ref");
assert!(!is_count_key("3a"), "a trailing letter is a literal ref");
assert!(!is_count_key(""), "the empty string is not a count key");
assert!(
!is_count_key("#31"),
"a fragment-shaped key is a literal ref"
);
}
#[test]
fn extract_exceptions_count_key_parsed_into_sentinel_slot() {
let source =
"---\nexceptions:\n stale_references:\n \"31\": \"migration table\"\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
let ex = extract_exceptions(&block, source);
assert!(
ex.stale_references.is_empty(),
"the count key is not a literal entry: {ex:?}"
);
let count = ex
.count_key(ExceptionLint::StaleReferences)
.expect("count key present");
assert_eq!(count.expected, 31, "N is parsed from the key: {ex:?}");
assert_eq!(
count.reason, "migration table",
"reason is the value: {ex:?}"
);
assert_eq!(count.raw, "31", "raw key text is retained: {ex:?}");
}
#[test]
fn extract_exceptions_count_key_and_literal_compose() {
let source = "---\nexceptions:\n stale_references:\n \"a.md\": \"literal\"\n \"31\": \"count\"\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
let ex = extract_exceptions(&block, source);
assert_eq!(
ex.stale_references.len(),
1,
"only the literal key is an entry: {ex:?}"
);
assert_eq!(
ex.stale_references[0].reference, "a.md",
"the literal key is the path: {ex:?}"
);
assert!(
ex.count_key(ExceptionLint::StaleReferences).is_some(),
"the all-digits key is the sentinel: {ex:?}"
);
}
#[test]
fn extract_exceptions_count_key_first_wins() {
let source =
"---\nexceptions:\n bare_paths:\n \"3\": \"first\"\n \"7\": \"second\"\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
let ex = extract_exceptions(&block, source);
let count = ex
.count_key(ExceptionLint::BarePaths)
.expect("count key present");
assert_eq!(count.expected, 3, "the first sentinel wins: {ex:?}");
}
#[test]
fn extract_exceptions_absent_block_is_empty() {
let source = "---\ntitle: test\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
let ex = extract_exceptions(&block, source);
assert!(
ex.stale_references.is_empty() && ex.bare_paths.is_empty(),
"no exceptions block yields empty: {ex:?}"
);
}
}