use crate::lex::assembling::stages::{
ApplyTableConfig, AttachAnnotations, AttachRoot, NormalizeLabels,
};
use crate::lex::ast::elements::annotation::Annotation;
use crate::lex::ast::elements::content_item::ContentItem;
use crate::lex::ast::elements::label::Label;
use crate::lex::ast::elements::verbatim::Verbatim;
use crate::lex::ast::Document;
use crate::lex::transforms::stages::ParseInlines;
use crate::lex::transforms::standard::LEXING;
use crate::lex::transforms::Runnable;
pub const LEGACY_TO_BLESSED: &[(&str, &str)] = &[
("category", "metadata.category"),
("template", "metadata.template"),
("publishing-date", "metadata.publishing-date"),
("front-matter", "metadata.front-matter"),
("doc.table", "table"),
("doc.image", "image"),
("doc.video", "video"),
("doc.audio", "audio"),
];
pub fn blessed_for_legacy(legacy: &str) -> Option<&'static str> {
LEGACY_TO_BLESSED
.iter()
.find(|(l, _)| *l == legacy)
.map(|(_, b)| *b)
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct LabelMigration {
pub byte_range: std::ops::Range<usize>,
pub from: &'static str,
pub to: &'static str,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct MigrationOutcome {
pub rewritten: String,
pub migrations: Vec<LabelMigration>,
}
impl MigrationOutcome {
pub fn is_modified(&self) -> bool {
!self.migrations.is_empty()
}
}
pub fn migrate_labels_in_source(src: &str) -> Result<MigrationOutcome, MigrationError> {
let doc = parse_permissive(src).map_err(|e| MigrationError::ParseFailed {
message: e.to_string(),
})?;
let mut sites = Vec::new();
collect_sites(&doc, src, &mut sites);
let rewritten = apply_migrations(src, &sites);
Ok(MigrationOutcome {
rewritten,
migrations: sites,
})
}
fn parse_permissive(src: &str) -> Result<Document, crate::lex::transforms::TransformError> {
let source = if !src.is_empty() && !src.ends_with('\n') {
format!("{src}\n")
} else {
src.to_string()
};
let tokens = LEXING.run(source.clone())?;
let mut output =
crate::lex::parsing::engine::parse_from_flat_tokens(tokens, &source).map_err(|e| {
crate::lex::transforms::TransformError::StageFailed {
stage: "Parser".to_string(),
message: e.to_string(),
}
})?;
output.root = ParseInlines::new().run(output.root)?;
if let Some(ref mut title) = output.title {
title.content.ensure_inline_parsed();
}
let mut doc = AttachRoot::new().run(output)?;
doc = AttachAnnotations::new().run(doc)?;
doc = NormalizeLabels::permissive().run(doc)?;
doc = ApplyTableConfig::new().run(doc)?;
Ok(doc)
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum MigrationError {
ParseFailed { message: String },
}
impl std::fmt::Display for MigrationError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::ParseFailed { message } => write!(f, "parse failed: {message}"),
}
}
}
impl std::error::Error for MigrationError {}
fn collect_sites(doc: &Document, src: &str, sites: &mut Vec<LabelMigration>) {
for ann in &doc.annotations {
check_label(&ann.data.label, src, sites);
for child in ann.children.iter() {
collect_in_item(child, src, sites);
}
}
for ann in &doc.root.annotations {
check_label(&ann.data.label, src, sites);
for child in ann.children.iter() {
collect_in_item(child, src, sites);
}
}
for item in doc.root.children.iter() {
collect_in_item(item, src, sites);
}
}
fn collect_in_item(item: &ContentItem, src: &str, sites: &mut Vec<LabelMigration>) {
match item {
ContentItem::Annotation(a) => check_annotation(a, src, sites),
ContentItem::VerbatimBlock(v) => check_verbatim(v, src, sites),
ContentItem::Table(t) => collect_in_table(t, src, sites),
_ => {}
}
if let Some(attached) = attached_annotations(item) {
for ann in attached.iter() {
check_annotation(ann, src, sites);
}
}
if let Some(children) = item.children() {
for child in children.iter() {
collect_in_item(child, src, sites);
}
}
}
fn collect_in_table(table: &crate::lex::ast::Table, src: &str, sites: &mut Vec<LabelMigration>) {
for row in table.header_rows.iter().chain(table.body_rows.iter()) {
for cell in row.cells.iter() {
for child in cell.children.iter() {
collect_in_item(child, src, sites);
}
}
}
if let Some(footnotes) = table.footnotes.as_ref() {
for ann in footnotes.annotations.iter() {
check_annotation(ann, src, sites);
}
for item in footnotes.items.iter() {
collect_in_item(item, src, sites);
}
}
}
fn check_annotation(annotation: &Annotation, src: &str, sites: &mut Vec<LabelMigration>) {
check_label(&annotation.data.label, src, sites);
for child in annotation.children.iter() {
collect_in_item(child, src, sites);
}
}
fn check_verbatim(verbatim: &Verbatim, src: &str, sites: &mut Vec<LabelMigration>) {
check_label(&verbatim.closing_data.label, src, sites);
}
fn attached_annotations(item: &ContentItem) -> Option<&Vec<Annotation>> {
match item {
ContentItem::Session(s) => Some(&s.annotations),
ContentItem::Paragraph(p) => Some(&p.annotations),
ContentItem::Definition(d) => Some(&d.annotations),
ContentItem::List(l) => Some(&l.annotations),
ContentItem::ListItem(li) => Some(&li.annotations),
ContentItem::VerbatimBlock(v) => Some(&v.annotations),
ContentItem::Table(t) => Some(&t.annotations),
_ => None,
}
}
fn check_label(label: &Label, src: &str, sites: &mut Vec<LabelMigration>) {
let span = &label.location.span;
let start = span.start;
let end = span.end;
if start > end || end > src.len() {
return;
}
let raw = &src[start..end];
let leading_ws = raw.bytes().take_while(|b| b.is_ascii_whitespace()).count();
let trailing_ws = raw
.bytes()
.rev()
.take_while(|b| b.is_ascii_whitespace())
.count();
let trim_start = start + leading_ws;
let trim_end = end.saturating_sub(trailing_ws);
if trim_start >= trim_end {
return;
}
let slice = &src[trim_start..trim_end];
if let Some((from, to)) = LEGACY_TO_BLESSED
.iter()
.find(|(legacy, _)| *legacy == slice)
{
debug_assert_eq!(
label.value, *from,
"permissive parse must preserve legacy spelling; got {} for source {slice}",
label.value
);
sites.push(LabelMigration {
byte_range: trim_start..trim_end,
from,
to,
});
}
}
fn apply_migrations(src: &str, sites: &[LabelMigration]) -> String {
if sites.is_empty() {
return src.to_string();
}
let mut result = src.to_string();
let mut sorted: Vec<&LabelMigration> = sites.iter().collect();
sorted.sort_by(|a, b| b.byte_range.start.cmp(&a.byte_range.start));
for site in sorted {
result.replace_range(site.byte_range.clone(), site.to);
}
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn no_legacy_labels_returns_input_unchanged() {
let src = "Hello world.\n\n:: lex.metadata.title :: My Doc\n";
let out = migrate_labels_in_source(src).expect("migrate ok");
assert_eq!(out.rewritten, src);
assert!(out.migrations.is_empty());
assert!(!out.is_modified());
}
#[test]
fn blessed_shortcuts_are_not_migrated() {
for shortcut in ["title", "author", "date", "tags"] {
let src = format!(":: {shortcut} :: value\n\nBody.\n");
let out = migrate_labels_in_source(&src).expect("migrate ok");
assert!(
!out.is_modified(),
"shortcut :: {shortcut} :: is the blessed form; must not migrate"
);
assert_eq!(out.rewritten, src);
}
}
#[test]
fn non_shortcut_bare_metadata_migrates_to_stripped_form() {
for (legacy, blessed) in [
("category", "metadata.category"),
("template", "metadata.template"),
("publishing-date", "metadata.publishing-date"),
("front-matter", "metadata.front-matter"),
] {
let src = format!(":: {legacy} :: value\n\nBody.\n");
let out = migrate_labels_in_source(&src).unwrap_or_else(|e| {
panic!("migrate failed for {legacy}: {e}");
});
assert!(out.is_modified(), "{legacy} must trigger migration");
assert_eq!(out.migrations[0].from, legacy);
assert_eq!(out.migrations[0].to, blessed);
assert!(
out.rewritten.contains(&format!(":: {blessed} ::")),
"rewritten must contain :: {blessed} ::, got: {}",
out.rewritten
);
}
}
#[test]
fn doc_table_migrates_to_blessed_table_shortcut() {
let src = "Table:\n\n | a | b |\n |---|---|\n | 1 | 2 |\n:: doc.table ::\n";
let out = migrate_labels_in_source(src).expect("migrate ok");
assert!(out.is_modified());
assert_eq!(out.migrations.len(), 1);
assert_eq!(out.migrations[0].from, "doc.table");
assert_eq!(out.migrations[0].to, "table");
assert!(out.rewritten.contains(":: table ::"));
assert!(!out.rewritten.contains(":: doc.table ::"));
}
#[test]
fn doc_image_video_audio_migrate_to_blessed_shortcuts() {
for (legacy, blessed) in [
("doc.image", "image"),
("doc.video", "video"),
("doc.audio", "audio"),
] {
let src = format!("Media:\n caption\n:: {legacy} src=file ::\n");
let out = migrate_labels_in_source(&src).expect("migrate ok");
assert!(out.is_modified(), ":: {legacy} :: must trigger migration");
assert_eq!(out.migrations[0].from, legacy);
assert_eq!(out.migrations[0].to, blessed);
assert!(
out.rewritten.contains(&format!(":: {blessed} ")),
"expected blessed :: {blessed} :: in {}",
out.rewritten
);
}
}
#[test]
fn multiple_legacy_labels_all_rewrite_with_correct_offsets() {
let src = ":: category :: tech\n:: template :: x\n\nBody.\n";
let out = migrate_labels_in_source(src).expect("migrate ok");
assert_eq!(
out.migrations.len(),
2,
"two legacy labels must produce two migrations: {:?}",
out.migrations
);
assert!(out.rewritten.contains(":: metadata.category ::"));
assert!(out.rewritten.contains(":: metadata.template ::"));
assert!(!out.rewritten.contains(":: category ::"));
assert!(!out.rewritten.contains(":: template ::"));
}
#[test]
fn non_legacy_labels_are_left_alone() {
let src = ":: acme.custom param=value :: body\n\nBody.\n";
let out = migrate_labels_in_source(src).expect("migrate ok");
assert!(!out.is_modified());
assert_eq!(out.rewritten, src);
}
#[test]
fn already_canonical_labels_are_left_alone() {
let src = ":: lex.metadata.title :: My Doc\n:: lex.media.image src=x ::\n";
let out = migrate_labels_in_source(src).expect("migrate ok");
assert!(!out.is_modified(), "canonical labels must not be migrated");
assert_eq!(out.rewritten, src);
}
#[test]
fn body_text_containing_legacy_words_is_not_rewritten() {
let src = "This paragraph mentions the category and template words.\n";
let out = migrate_labels_in_source(src).expect("migrate ok");
assert!(!out.is_modified(), "body words must not be rewritten");
assert_eq!(out.rewritten, src);
}
#[test]
fn collect_in_table_recurses_into_cell_block_children() {
use crate::lex::ast::elements::annotation::Annotation;
use crate::lex::ast::elements::data::Data;
use crate::lex::ast::elements::label::Label;
use crate::lex::ast::elements::table::{Table, TableCell, TableRow};
use crate::lex::ast::elements::typed_content::ContentElement;
use crate::lex::ast::elements::verbatim::VerbatimBlockMode;
use crate::lex::ast::range::{Position, Range as AstRange};
use crate::lex::ast::text_content::TextContent;
use crate::lex::ast::Document as LexDocument;
let src = ":: category ::\n";
let label_span = std::ops::Range { start: 3, end: 11 };
let label = Label {
value: "category".to_string(),
location: AstRange::new(label_span, Position::new(0, 3), Position::new(0, 11)),
form: crate::lex::ast::elements::label::LabelForm::Canonical,
};
let inner_annotation = Annotation::from_data(Data::new(label, Vec::new()), Vec::new());
let cell = TableCell::new(TextContent::from_string("cell".into(), None))
.with_children(vec![ContentElement::Annotation(inner_annotation)]);
let row = TableRow::new(vec![cell]);
let table = Table::new(
TextContent::from_string("Data".into(), None),
Vec::new(),
vec![row],
VerbatimBlockMode::Inflow,
);
let mut doc = LexDocument::new();
doc.root
.children
.as_mut_vec()
.push(ContentItem::Table(Box::new(table)));
let mut sites = Vec::new();
collect_sites(&doc, src, &mut sites);
assert_eq!(
sites.len(),
1,
"legacy annotation inside a table cell's block children must be discovered"
);
assert_eq!(sites[0].from, "category");
assert_eq!(sites[0].to, "metadata.category");
assert_eq!(sites[0].byte_range, 3..11);
}
#[test]
fn migrations_have_correct_byte_ranges() {
let src = ":: category :: foo\n\nBody.\n";
let out = migrate_labels_in_source(src).expect("migrate ok");
let m = &out.migrations[0];
let slice = &src[m.byte_range.clone()];
assert_eq!(slice, m.from, "byte range must point at the legacy text");
}
}