1use crate::lex::assembling::stages::{
32 ApplyTableConfig, AttachAnnotations, AttachRoot, NormalizeLabels,
33};
34use crate::lex::ast::elements::annotation::Annotation;
35use crate::lex::ast::elements::content_item::ContentItem;
36use crate::lex::ast::elements::label::Label;
37use crate::lex::ast::elements::verbatim::Verbatim;
38use crate::lex::ast::Document;
39use crate::lex::transforms::stages::ParseInlines;
40use crate::lex::transforms::standard::LEXING;
41use crate::lex::transforms::Runnable;
42
43pub const LEGACY_TO_BLESSED: &[(&str, &str)] = &[
55 ("category", "metadata.category"),
56 ("template", "metadata.template"),
57 ("publishing-date", "metadata.publishing-date"),
58 ("front-matter", "metadata.front-matter"),
59 ("doc.table", "table"),
60 ("doc.image", "image"),
61 ("doc.video", "video"),
62 ("doc.audio", "audio"),
63];
64
65pub fn blessed_for_legacy(legacy: &str) -> Option<&'static str> {
69 LEGACY_TO_BLESSED
70 .iter()
71 .find(|(l, _)| *l == legacy)
72 .map(|(_, b)| *b)
73}
74
75#[derive(Debug, Clone, PartialEq, Eq)]
77pub struct LabelMigration {
78 pub byte_range: std::ops::Range<usize>,
80 pub from: &'static str,
82 pub to: &'static str,
84}
85
86#[derive(Debug, Clone, PartialEq, Eq)]
88pub struct MigrationOutcome {
89 pub rewritten: String,
93 pub migrations: Vec<LabelMigration>,
96}
97
98impl MigrationOutcome {
99 pub fn is_modified(&self) -> bool {
103 !self.migrations.is_empty()
104 }
105}
106
107pub fn migrate_labels_in_source(src: &str) -> Result<MigrationOutcome, MigrationError> {
115 let doc = parse_permissive(src).map_err(|e| MigrationError::ParseFailed {
120 message: e.to_string(),
121 })?;
122
123 let mut sites = Vec::new();
124 collect_sites(&doc, src, &mut sites);
125
126 let rewritten = apply_migrations(src, &sites);
127 Ok(MigrationOutcome {
128 rewritten,
129 migrations: sites,
130 })
131}
132
133fn parse_permissive(src: &str) -> Result<Document, crate::lex::transforms::TransformError> {
138 let source = if !src.is_empty() && !src.ends_with('\n') {
139 format!("{src}\n")
140 } else {
141 src.to_string()
142 };
143 let tokens = LEXING.run(source.clone())?;
144 let mut output =
145 crate::lex::parsing::engine::parse_from_flat_tokens(tokens, &source).map_err(|e| {
146 crate::lex::transforms::TransformError::StageFailed {
147 stage: "Parser".to_string(),
148 message: e.to_string(),
149 }
150 })?;
151 output.root = ParseInlines::new().run(output.root)?;
152 if let Some(ref mut title) = output.title {
153 title.content.ensure_inline_parsed();
154 }
155 let mut doc = AttachRoot::new().run(output)?;
156 doc = AttachAnnotations::new().run(doc)?;
157 doc = NormalizeLabels::permissive().run(doc)?;
158 doc = ApplyTableConfig::new().run(doc)?;
159 Ok(doc)
160}
161
162#[derive(Debug, Clone, PartialEq, Eq)]
164pub enum MigrationError {
165 ParseFailed { message: String },
168}
169
170impl std::fmt::Display for MigrationError {
171 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
172 match self {
173 Self::ParseFailed { message } => write!(f, "parse failed: {message}"),
174 }
175 }
176}
177
178impl std::error::Error for MigrationError {}
179
180fn collect_sites(doc: &Document, src: &str, sites: &mut Vec<LabelMigration>) {
181 for ann in &doc.annotations {
182 check_label(&ann.data.label, src, sites);
183 for child in ann.children.iter() {
184 collect_in_item(child, src, sites);
185 }
186 }
187 for ann in &doc.root.annotations {
188 check_label(&ann.data.label, src, sites);
189 for child in ann.children.iter() {
190 collect_in_item(child, src, sites);
191 }
192 }
193 for item in doc.root.children.iter() {
194 collect_in_item(item, src, sites);
195 }
196}
197
198fn collect_in_item(item: &ContentItem, src: &str, sites: &mut Vec<LabelMigration>) {
199 match item {
200 ContentItem::Annotation(a) => check_annotation(a, src, sites),
201 ContentItem::VerbatimBlock(v) => check_verbatim(v, src, sites),
202 ContentItem::Table(t) => collect_in_table(t, src, sites),
203 _ => {}
204 }
205 if let Some(attached) = attached_annotations(item) {
206 for ann in attached.iter() {
207 check_annotation(ann, src, sites);
208 }
209 }
210 if let Some(children) = item.children() {
211 for child in children.iter() {
212 collect_in_item(child, src, sites);
213 }
214 }
215}
216
217fn collect_in_table(table: &crate::lex::ast::Table, src: &str, sites: &mut Vec<LabelMigration>) {
218 for row in table.header_rows.iter().chain(table.body_rows.iter()) {
226 for cell in row.cells.iter() {
227 for child in cell.children.iter() {
228 collect_in_item(child, src, sites);
229 }
230 }
231 }
232 if let Some(footnotes) = table.footnotes.as_ref() {
233 for ann in footnotes.annotations.iter() {
234 check_annotation(ann, src, sites);
235 }
236 for item in footnotes.items.iter() {
237 collect_in_item(item, src, sites);
238 }
239 }
240}
241
242fn check_annotation(annotation: &Annotation, src: &str, sites: &mut Vec<LabelMigration>) {
243 check_label(&annotation.data.label, src, sites);
244 for child in annotation.children.iter() {
245 collect_in_item(child, src, sites);
246 }
247}
248
249fn check_verbatim(verbatim: &Verbatim, src: &str, sites: &mut Vec<LabelMigration>) {
250 check_label(&verbatim.closing_data.label, src, sites);
251}
252
253fn attached_annotations(item: &ContentItem) -> Option<&Vec<Annotation>> {
254 match item {
255 ContentItem::Session(s) => Some(&s.annotations),
256 ContentItem::Paragraph(p) => Some(&p.annotations),
257 ContentItem::Definition(d) => Some(&d.annotations),
258 ContentItem::List(l) => Some(&l.annotations),
259 ContentItem::ListItem(li) => Some(&li.annotations),
260 ContentItem::VerbatimBlock(v) => Some(&v.annotations),
261 ContentItem::Table(t) => Some(&t.annotations),
262 _ => None,
263 }
264}
265
266fn check_label(label: &Label, src: &str, sites: &mut Vec<LabelMigration>) {
267 let span = &label.location.span;
279 let start = span.start;
280 let end = span.end;
281 if start > end || end > src.len() {
282 return;
285 }
286 let raw = &src[start..end];
287 let leading_ws = raw.bytes().take_while(|b| b.is_ascii_whitespace()).count();
288 let trailing_ws = raw
289 .bytes()
290 .rev()
291 .take_while(|b| b.is_ascii_whitespace())
292 .count();
293 let trim_start = start + leading_ws;
294 let trim_end = end.saturating_sub(trailing_ws);
295 if trim_start >= trim_end {
296 return;
297 }
298 let slice = &src[trim_start..trim_end];
299 if let Some((from, to)) = LEGACY_TO_BLESSED
300 .iter()
301 .find(|(legacy, _)| *legacy == slice)
302 {
303 debug_assert_eq!(
306 label.value, *from,
307 "permissive parse must preserve legacy spelling; got {} for source {slice}",
308 label.value
309 );
310 sites.push(LabelMigration {
311 byte_range: trim_start..trim_end,
312 from,
313 to,
314 });
315 }
316}
317
318fn apply_migrations(src: &str, sites: &[LabelMigration]) -> String {
319 if sites.is_empty() {
320 return src.to_string();
321 }
322 let mut result = src.to_string();
326 let mut sorted: Vec<&LabelMigration> = sites.iter().collect();
327 sorted.sort_by(|a, b| b.byte_range.start.cmp(&a.byte_range.start));
328 for site in sorted {
329 result.replace_range(site.byte_range.clone(), site.to);
330 }
331 result
332}
333
334#[cfg(test)]
335mod tests {
336 use super::*;
337
338 #[test]
339 fn no_legacy_labels_returns_input_unchanged() {
340 let src = "Hello world.\n\n:: lex.metadata.title :: My Doc\n";
341 let out = migrate_labels_in_source(src).expect("migrate ok");
342 assert_eq!(out.rewritten, src);
343 assert!(out.migrations.is_empty());
344 assert!(!out.is_modified());
345 }
346
347 #[test]
348 fn blessed_shortcuts_are_not_migrated() {
349 for shortcut in ["title", "author", "date", "tags"] {
352 let src = format!(":: {shortcut} :: value\n\nBody.\n");
353 let out = migrate_labels_in_source(&src).expect("migrate ok");
354 assert!(
355 !out.is_modified(),
356 "shortcut :: {shortcut} :: is the blessed form; must not migrate"
357 );
358 assert_eq!(out.rewritten, src);
359 }
360 }
361
362 #[test]
363 fn non_shortcut_bare_metadata_migrates_to_stripped_form() {
364 for (legacy, blessed) in [
368 ("category", "metadata.category"),
369 ("template", "metadata.template"),
370 ("publishing-date", "metadata.publishing-date"),
371 ("front-matter", "metadata.front-matter"),
372 ] {
373 let src = format!(":: {legacy} :: value\n\nBody.\n");
374 let out = migrate_labels_in_source(&src).unwrap_or_else(|e| {
375 panic!("migrate failed for {legacy}: {e}");
376 });
377 assert!(out.is_modified(), "{legacy} must trigger migration");
378 assert_eq!(out.migrations[0].from, legacy);
379 assert_eq!(out.migrations[0].to, blessed);
380 assert!(
381 out.rewritten.contains(&format!(":: {blessed} ::")),
382 "rewritten must contain :: {blessed} ::, got: {}",
383 out.rewritten
384 );
385 }
386 }
387
388 #[test]
389 fn doc_table_migrates_to_blessed_table_shortcut() {
390 let src = "Table:\n\n | a | b |\n |---|---|\n | 1 | 2 |\n:: doc.table ::\n";
391 let out = migrate_labels_in_source(src).expect("migrate ok");
392 assert!(out.is_modified());
393 assert_eq!(out.migrations.len(), 1);
394 assert_eq!(out.migrations[0].from, "doc.table");
395 assert_eq!(out.migrations[0].to, "table");
396 assert!(out.rewritten.contains(":: table ::"));
397 assert!(!out.rewritten.contains(":: doc.table ::"));
398 }
399
400 #[test]
401 fn doc_image_video_audio_migrate_to_blessed_shortcuts() {
402 for (legacy, blessed) in [
403 ("doc.image", "image"),
404 ("doc.video", "video"),
405 ("doc.audio", "audio"),
406 ] {
407 let src = format!("Media:\n caption\n:: {legacy} src=file ::\n");
408 let out = migrate_labels_in_source(&src).expect("migrate ok");
409 assert!(out.is_modified(), ":: {legacy} :: must trigger migration");
410 assert_eq!(out.migrations[0].from, legacy);
411 assert_eq!(out.migrations[0].to, blessed);
412 assert!(
413 out.rewritten.contains(&format!(":: {blessed} ")),
414 "expected blessed :: {blessed} :: in {}",
415 out.rewritten
416 );
417 }
418 }
419
420 #[test]
421 fn multiple_legacy_labels_all_rewrite_with_correct_offsets() {
422 let src = ":: category :: tech\n:: template :: x\n\nBody.\n";
423 let out = migrate_labels_in_source(src).expect("migrate ok");
424 assert_eq!(
425 out.migrations.len(),
426 2,
427 "two legacy labels must produce two migrations: {:?}",
428 out.migrations
429 );
430 assert!(out.rewritten.contains(":: metadata.category ::"));
431 assert!(out.rewritten.contains(":: metadata.template ::"));
432 assert!(!out.rewritten.contains(":: category ::"));
433 assert!(!out.rewritten.contains(":: template ::"));
434 }
435
436 #[test]
437 fn non_legacy_labels_are_left_alone() {
438 let src = ":: acme.custom param=value :: body\n\nBody.\n";
439 let out = migrate_labels_in_source(src).expect("migrate ok");
440 assert!(!out.is_modified());
441 assert_eq!(out.rewritten, src);
442 }
443
444 #[test]
445 fn already_canonical_labels_are_left_alone() {
446 let src = ":: lex.metadata.title :: My Doc\n:: lex.media.image src=x ::\n";
447 let out = migrate_labels_in_source(src).expect("migrate ok");
448 assert!(!out.is_modified(), "canonical labels must not be migrated");
449 assert_eq!(out.rewritten, src);
450 }
451
452 #[test]
453 fn body_text_containing_legacy_words_is_not_rewritten() {
454 let src = "This paragraph mentions the category and template words.\n";
457 let out = migrate_labels_in_source(src).expect("migrate ok");
458 assert!(!out.is_modified(), "body words must not be rewritten");
459 assert_eq!(out.rewritten, src);
460 }
461
462 #[test]
463 fn collect_in_table_recurses_into_cell_block_children() {
464 use crate::lex::ast::elements::annotation::Annotation;
475 use crate::lex::ast::elements::data::Data;
476 use crate::lex::ast::elements::label::Label;
477 use crate::lex::ast::elements::table::{Table, TableCell, TableRow};
478 use crate::lex::ast::elements::typed_content::ContentElement;
479 use crate::lex::ast::elements::verbatim::VerbatimBlockMode;
480 use crate::lex::ast::range::{Position, Range as AstRange};
481 use crate::lex::ast::text_content::TextContent;
482 use crate::lex::ast::Document as LexDocument;
483
484 let src = ":: category ::\n";
486 let label_span = std::ops::Range { start: 3, end: 11 };
487 let label = Label {
488 value: "category".to_string(),
489 location: AstRange::new(label_span, Position::new(0, 3), Position::new(0, 11)),
490 form: crate::lex::ast::elements::label::LabelForm::Canonical,
491 };
492 let inner_annotation = Annotation::from_data(Data::new(label, Vec::new()), Vec::new());
493
494 let cell = TableCell::new(TextContent::from_string("cell".into(), None))
495 .with_children(vec![ContentElement::Annotation(inner_annotation)]);
496 let row = TableRow::new(vec![cell]);
497 let table = Table::new(
498 TextContent::from_string("Data".into(), None),
499 Vec::new(),
500 vec![row],
501 VerbatimBlockMode::Inflow,
502 );
503
504 let mut doc = LexDocument::new();
505 doc.root
506 .children
507 .as_mut_vec()
508 .push(ContentItem::Table(Box::new(table)));
509
510 let mut sites = Vec::new();
511 collect_sites(&doc, src, &mut sites);
512
513 assert_eq!(
514 sites.len(),
515 1,
516 "legacy annotation inside a table cell's block children must be discovered"
517 );
518 assert_eq!(sites[0].from, "category");
519 assert_eq!(sites[0].to, "metadata.category");
520 assert_eq!(sites[0].byte_range, 3..11);
521 }
522
523 #[test]
524 fn migrations_have_correct_byte_ranges() {
525 let src = ":: category :: foo\n\nBody.\n";
528 let out = migrate_labels_in_source(src).expect("migrate ok");
529 let m = &out.migrations[0];
530 let slice = &src[m.byte_range.clone()];
531 assert_eq!(slice, m.from, "byte range must point at the legacy text");
532 }
533}