markdown2pdf 1.3.0

Create PDF with Markdown files (a md to pdf transpiler)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
//! Direct lexer tests for GFM footnotes. Covers inline references,
//! block definitions, and the edge cases where the `[^...]` syntax
//! could collide with regular link parsing.

use super::common::parse;
use markdown2pdf::markdown::Token;

fn refs_of(tokens: &[Token]) -> Vec<String> {
    let mut out = Vec::new();
    fn walk(t: &Token, out: &mut Vec<String>) {
        match t {
            Token::FootnoteReference(label) => out.push(label.clone()),
            Token::Heading(inner, _)
            | Token::Emphasis { content: inner, .. }
            | Token::StrongEmphasis(inner)
            | Token::Strikethrough(inner)
            | Token::BlockQuote(inner)
            | Token::ListItem { content: inner, .. }
            | Token::Link { content: inner, .. }
            | Token::Image { alt: inner, .. }
            | Token::FootnoteDefinition { content: inner, .. } => {
                for c in inner {
                    walk(c, out);
                }
            }
            _ => {}
        }
    }
    for t in tokens {
        walk(t, &mut out);
    }
    out
}

fn defs_of(tokens: &[Token]) -> Vec<(String, String)> {
    let mut out = Vec::new();
    for t in tokens {
        if let Token::FootnoteDefinition { label, content } = t {
            out.push((label.clone(), Token::collect_all_text(content)));
        }
    }
    out
}

#[test]
fn reference_with_numeric_label() {
    let tokens = parse("Text[^1].");
    assert_eq!(refs_of(&tokens), vec!["1".to_string()]);
}

#[test]
fn reference_with_alphabetic_label() {
    let tokens = parse("Text[^note].");
    assert_eq!(refs_of(&tokens), vec!["note".to_string()]);
}

#[test]
fn reference_with_alphanumeric_label() {
    let tokens = parse("Text[^a1b2].");
    assert_eq!(refs_of(&tokens), vec!["a1b2".to_string()]);
}

#[test]
fn reference_with_dash_in_label() {
    let tokens = parse("Text[^my-note].");
    assert_eq!(refs_of(&tokens), vec!["my-note".to_string()]);
}

#[test]
fn reference_with_underscore_in_label() {
    let tokens = parse("Text[^a_b].");
    assert_eq!(refs_of(&tokens), vec!["a_b".to_string()]);
}

#[test]
fn reference_at_start_of_line_is_parsed() {
    // Block-start condition tries definition first, but no `:`
    // follows so the parser falls back to inline reference.
    let tokens = parse("[^1] starts the line.");
    assert_eq!(refs_of(&tokens), vec!["1".to_string()]);
}

#[test]
fn multiple_references_on_one_line() {
    let tokens = parse("First[^1] then[^2] and[^3].");
    assert_eq!(
        refs_of(&tokens),
        vec!["1".to_string(), "2".to_string(), "3".to_string()]
    );
}

#[test]
fn repeated_reference_with_same_label() {
    // The lexer doesn't deduplicate — both occurrences are emitted.
    // Numbering / dedup happens at lower time.
    let tokens = parse("Body[^1] then again[^1].");
    assert_eq!(
        refs_of(&tokens),
        vec!["1".to_string(), "1".to_string()]
    );
}

#[test]
fn reference_inside_emphasis() {
    let tokens = parse("*emphasized text[^1] inside*");
    assert_eq!(refs_of(&tokens), vec!["1".to_string()]);
}

#[test]
fn reference_inside_strong_emphasis() {
    let tokens = parse("**bold text[^1] inside**");
    assert_eq!(refs_of(&tokens), vec!["1".to_string()]);
}

#[test]
fn empty_label_falls_back_to_text() {
    let tokens = parse("Text [^] more.");
    // `[^]` has no label chars between `^` and `]`. Should NOT
    // produce a FootnoteReference.
    assert_eq!(refs_of(&tokens), Vec::<String>::new());
}

#[test]
fn unclosed_reference_falls_back_to_text() {
    let tokens = parse("Text [^1 missing close.");
    assert_eq!(refs_of(&tokens), Vec::<String>::new());
}

#[test]
fn label_with_invalid_chars_falls_back() {
    // `!` is not a valid label character, so the parser fails and
    // the bracket falls through to link parsing (which then also
    // fails and emits literal text).
    let tokens = parse("Text [^a!b] more.");
    assert_eq!(refs_of(&tokens), Vec::<String>::new());
}

#[test]
fn link_without_caret_unaffected() {
    let tokens = parse("[just a link](https://example.com)");
    // No footnote refs.
    assert_eq!(refs_of(&tokens), Vec::<String>::new());
    // Link token still present.
    let has_link = tokens.iter().any(|t| matches!(t, Token::Link { .. }));
    assert!(has_link, "regular link parsing broke");
}

#[test]
fn reference_followed_by_punctuation() {
    let tokens = parse("Sentence ending with note[^1], comma.");
    assert_eq!(refs_of(&tokens), vec!["1".to_string()]);
}

#[test]
fn definition_with_simple_content() {
    let tokens = parse("[^1]: First definition");
    let defs = defs_of(&tokens);
    assert_eq!(defs.len(), 1);
    assert_eq!(defs[0].0, "1");
    assert!(defs[0].1.contains("First definition"));
}

#[test]
fn definition_with_alphanumeric_label() {
    let tokens = parse("[^abc]: Some text");
    let defs = defs_of(&tokens);
    assert_eq!(defs.len(), 1);
    assert_eq!(defs[0].0, "abc");
}

#[test]
fn definition_body_inline_emphasis_is_parsed() {
    let tokens = parse("[^1]: Definition with *emphasis* in it");
    for t in &tokens {
        if let Token::FootnoteDefinition { label, content } = t {
            assert_eq!(label, "1");
            let has_emphasis = content
                .iter()
                .any(|c| matches!(c, Token::Emphasis { .. }));
            assert!(has_emphasis, "expected parsed Emphasis token in definition body, got {:?}", content);
            return;
        }
    }
    panic!("no FootnoteDefinition emitted");
}

#[test]
fn definition_only_at_line_start() {
    // A `[^1]: ...` appearing mid-paragraph is NOT a definition.
    let tokens = parse("Body text [^1]: not a def");
    assert_eq!(defs_of(&tokens), Vec::<(String, String)>::new());
}

#[test]
fn multiple_definitions_each_become_a_token() {
    let tokens = parse("[^1]: First\n[^2]: Second\n[^abc]: Third");
    let defs = defs_of(&tokens);
    assert_eq!(defs.len(), 3);
    assert_eq!(defs[0].0, "1");
    assert_eq!(defs[1].0, "2");
    assert_eq!(defs[2].0, "abc");
}

#[test]
fn definition_body_link_is_parsed() {
    let tokens = parse("[^1]: See [example](https://example.com)");
    for t in &tokens {
        if let Token::FootnoteDefinition { label, content } = t {
            assert_eq!(label, "1");
            let link = content.iter().find_map(|c| {
                if let Token::Link { url, .. } = c {
                    Some(url.clone())
                } else {
                    None
                }
            });
            assert_eq!(link.as_deref(), Some("https://example.com"));
            return;
        }
    }
    panic!("no FootnoteDefinition emitted");
}

#[test]
fn reference_and_definition_in_same_document() {
    let tokens = parse("Body text[^1].\n[^1]: Note");
    assert_eq!(refs_of(&tokens), vec!["1".to_string()]);
    let defs = defs_of(&tokens);
    assert_eq!(defs.len(), 1);
    assert_eq!(defs[0].0, "1");
}

#[test]
fn unused_definition_still_lexed() {
    let tokens = parse("[^orphan]: Nobody references me");
    let defs = defs_of(&tokens);
    assert_eq!(defs.len(), 1);
    assert_eq!(defs[0].0, "orphan");
}

#[test]
fn forward_reference_before_definition() {
    let tokens = parse("Body[^later].\n\nMore body.\n\n[^later]: definition");
    assert_eq!(refs_of(&tokens), vec!["later".to_string()]);
    let defs = defs_of(&tokens);
    assert_eq!(defs.len(), 1);
}

#[test]
fn definition_with_empty_body_lexes() {
    let tokens = parse("[^1]:");
    let defs = defs_of(&tokens);
    assert_eq!(defs.len(), 1);
    assert_eq!(defs[0].0, "1");
    // Empty body — content is empty or whitespace only.
    assert!(defs[0].1.trim().is_empty());
}

#[test]
fn multiline_definition_joins_indented_continuation() {
    // GFM: 4-space-indented continuation lines become part of the
    // same definition body, joined by a soft space.
    let src = "[^1]: First line.\n    Second line continues.";
    let tokens = parse(src);
    let defs = defs_of(&tokens);
    assert_eq!(defs.len(), 1);
    assert!(
        defs[0].1.contains("First line.") && defs[0].1.contains("Second line continues."),
        "multi-line body lost content: {:?}",
        defs[0].1
    );
}

#[test]
fn multiline_definition_supports_three_continuation_lines() {
    let src = "[^1]: Line one.\n    Line two.\n    Line three.\n    Line four.";
    let tokens = parse(src);
    let defs = defs_of(&tokens);
    assert_eq!(defs.len(), 1);
    for needle in ["Line one.", "Line two.", "Line three.", "Line four."] {
        assert!(
            defs[0].1.contains(needle),
            "multi-line body missing `{}` (got {:?})",
            needle,
            defs[0].1
        );
    }
}

#[test]
fn multiline_definition_stops_at_blank_line() {
    // The blank line terminates the body; the paragraph after stays
    // a regular paragraph, not part of the footnote.
    let src = "[^1]: Inside footnote.\n\nNot in footnote.";
    let tokens = parse(src);
    let defs = defs_of(&tokens);
    assert_eq!(defs.len(), 1);
    assert!(defs[0].1.contains("Inside footnote."));
    assert!(
        !defs[0].1.contains("Not in footnote"),
        "blank line should have ended the body: {:?}",
        defs[0].1
    );
}

#[test]
fn multiline_definition_stops_at_unindented_line() {
    let src = "[^1]: First line.\nNot indented.";
    let tokens = parse(src);
    let defs = defs_of(&tokens);
    assert_eq!(defs.len(), 1);
    assert!(defs[0].1.contains("First line."));
    assert!(
        !defs[0].1.contains("Not indented"),
        "non-indented line should have ended the body: {:?}",
        defs[0].1
    );
}

#[test]
fn multiline_definition_indent_can_be_tab() {
    // A leading tab counts as ≥4 columns of indentation per GFM.
    let src = "[^1]: First line.\n\tSecond line via tab.";
    let tokens = parse(src);
    let defs = defs_of(&tokens);
    assert_eq!(defs.len(), 1);
    assert!(
        defs[0].1.contains("First line.") && defs[0].1.contains("Second line via tab."),
        "tab continuation not joined: {:?}",
        defs[0].1
    );
}

#[test]
fn multiline_definition_indent_requires_four_spaces() {
    // Only 3 spaces of indent: NOT a continuation. The body is the
    // first line only.
    let src = "[^1]: First line.\n   Three-space indent.";
    let tokens = parse(src);
    let defs = defs_of(&tokens);
    assert_eq!(defs.len(), 1);
    assert!(defs[0].1.contains("First line."));
    assert!(
        !defs[0].1.contains("Three-space indent"),
        "3-space indent should NOT be a continuation: {:?}",
        defs[0].1
    );
}

#[test]
fn multiline_definition_continuation_runs_inline_lexer() {
    // Inline markdown inside continuation lines should be parsed
    // (emphasis, links, code) just like the first line.
    let src = "[^1]: First.\n    Second with *emphasis* and `code`.";
    let tokens = parse(src);
    for t in &tokens {
        if let Token::FootnoteDefinition { label, content } = t {
            assert_eq!(label, "1");
            let has_emphasis = content
                .iter()
                .any(|c| matches!(c, Token::Emphasis { .. }));
            let has_code = content
                .iter()
                .any(|c| matches!(c, Token::Code { block: false, .. }));
            assert!(has_emphasis, "no emphasis from continuation: {:?}", content);
            assert!(has_code, "no inline code from continuation: {:?}", content);
            return;
        }
    }
    panic!("no FootnoteDefinition emitted");
}

#[test]
fn multiline_definition_followed_by_another_definition() {
    // Multiple consecutive multi-line definitions all parse cleanly.
    let src = "[^1]: First definition.\n    Second line of first.\n[^2]: Second definition.\n    Second line of second.";
    let tokens = parse(src);
    let defs = defs_of(&tokens);
    assert_eq!(defs.len(), 2);
    assert_eq!(defs[0].0, "1");
    assert_eq!(defs[1].0, "2");
    assert!(defs[0].1.contains("Second line of first."));
    assert!(defs[1].1.contains("Second line of second."));
    assert!(
        !defs[0].1.contains("Second line of second"),
        "definitions leaked content between each other: {:?}",
        defs[0].1
    );
}

#[test]
fn singleline_definition_still_works() {
    // Sanity: pre-existing single-line behavior is unchanged.
    let src = "[^1]: Just one line.";
    let tokens = parse(src);
    let defs = defs_of(&tokens);
    assert_eq!(defs.len(), 1);
    assert_eq!(defs[0].1, "Just one line.");
}

#[test]
fn multiline_definition_does_not_consume_following_unindented_paragraph() {
    // Regression: paragraphs after a footnote definition should
    // remain regular paragraphs, not get absorbed.
    let src = "[^1]: A footnote.\n    Continued.\n\nNew paragraph after blank line.";
    let tokens = parse(src);
    let defs = defs_of(&tokens);
    assert_eq!(defs.len(), 1);
    assert!(defs[0].1.contains("A footnote."));
    assert!(defs[0].1.contains("Continued."));
    let all_text = Token::collect_all_text(&tokens);
    assert!(
        all_text.contains("New paragraph after blank line."),
        "following paragraph was swallowed: {:?}",
        all_text
    );
}