flowmark 0.3.1

A Markdown auto-formatter for clean diffs and semantic line breaks
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
//! Tag handling for Jinja/Markdoc tags and HTML comments.
//!
//! Ported from Python: `flowmark/linewrapping/tag_handling.py`

use regex::Regex;
use std::sync::LazyLock;

use crate::wrapping::LineWrapper;
use crate::wrapping::atomic_patterns::{
    SINGLE_HTML_COMMENT, SINGLE_JINJA_COMMENT, SINGLE_JINJA_TAG, SINGLE_JINJA_VAR,
};
use crate::wrapping::block_heuristics::{
    line_is_block_content, line_is_list_item, line_is_table_row, normalize_table_separator,
};

/// Pattern to match complete template tags (for protecting content inside tags).
pub(crate) static TEMPLATE_TAG_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
    let patterns = [
        SINGLE_JINJA_TAG.pattern,
        SINGLE_JINJA_COMMENT.pattern,
        SINGLE_JINJA_VAR.pattern,
        SINGLE_HTML_COMMENT.pattern,
    ];
    Regex::new(&format!("(?s){}", patterns.join("|"))).expect("valid TEMPLATE_TAG_PATTERN regex")
});

/// Pattern to detect adjacent tags (closing tag immediately followed by opening tag).
static ADJACENT_TAGS_RE: LazyLock<Regex> = LazyLock::new(|| {
    let pattern = format!(
        "({close_jt})({open_jt})|({close_jc})({open_jc})|({close_jv})({open_jv})|({close_hc})({open_hc})",
        close_jt = SINGLE_JINJA_TAG.close_re,
        open_jt = SINGLE_JINJA_TAG.open_re,
        close_jc = SINGLE_JINJA_COMMENT.close_re,
        open_jc = SINGLE_JINJA_COMMENT.open_re,
        close_jv = SINGLE_JINJA_VAR.close_re,
        open_jv = SINGLE_JINJA_VAR.open_re,
        close_hc = SINGLE_HTML_COMMENT.close_re,
        open_hc = SINGLE_HTML_COMMENT.open_re,
    );
    Regex::new(&pattern).expect("valid ADJACENT_TAGS_RE regex")
});

/// Pattern to remove spaces between adjacent tags.
static DENORMALIZE_TAGS_RE: LazyLock<Regex> = LazyLock::new(|| {
    let pattern = format!(
        "({close_jt}) ({open_jt})|({close_jc}) ({open_jc})|({close_jv}) ({open_jv})|({close_hc}) ({open_hc})",
        close_jt = SINGLE_JINJA_TAG.close_re,
        open_jt = SINGLE_JINJA_TAG.open_re,
        close_jc = SINGLE_JINJA_COMMENT.close_re,
        open_jc = SINGLE_JINJA_COMMENT.open_re,
        close_jv = SINGLE_JINJA_VAR.close_re,
        open_jv = SINGLE_JINJA_VAR.open_re,
        close_hc = SINGLE_HTML_COMMENT.close_re,
        open_hc = SINGLE_HTML_COMMENT.open_re,
    );
    Regex::new(&pattern).expect("valid DENORMALIZE_TAGS_RE regex")
});

/// Pattern for detecting multiline closing tags.
static MULTILINE_CLOSING_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
    let pattern = format!(
        r"{close_jt}\s*(?P<closing_tag>{open_jt}\s*/)|{close_jc}\s*(?P<closing_comment>{open_jc}\s*/)|{close_jv}\s*(?P<closing_var>{open_jv}\s*/)|{close_hc}\s*(?P<closing_html>{open_hc}\s*/)",
        close_jt = SINGLE_JINJA_TAG.close_re,
        open_jt = SINGLE_JINJA_TAG.open_re,
        close_jc = SINGLE_JINJA_COMMENT.close_re,
        open_jc = SINGLE_JINJA_COMMENT.open_re,
        close_jv = SINGLE_JINJA_VAR.close_re,
        open_jv = SINGLE_JINJA_VAR.open_re,
        close_hc = SINGLE_HTML_COMMENT.close_re,
        open_hc = SINGLE_HTML_COMMENT.open_re,
    );
    Regex::new(&pattern).expect("valid MULTILINE_CLOSING_PATTERN regex")
});

/// Add a space between adjacent tags so they become separate tokens.
pub fn normalize_adjacent_tags(text: &str) -> String {
    ADJACENT_TAGS_RE
        .replace_all(text, |caps: &regex::Captures<'_>| {
            let groups: Vec<Option<regex::Match<'_>>> =
                (1..=caps.len()).map(|i| caps.get(i)).collect();
            for i in (0..groups.len()).step_by(2) {
                if let (Some(a), Some(b)) =
                    (&groups.get(i).copied().flatten(), &groups.get(i + 1).copied().flatten())
                {
                    return format!("{} {}", a.as_str(), b.as_str());
                }
            }
            caps.get(0).expect("group 0 always exists").as_str().to_string()
        })
        .into_owned()
}

/// Remove spaces between adjacent tags that were added during word splitting.
pub fn denormalize_adjacent_tags(text: &str) -> String {
    DENORMALIZE_TAGS_RE
        .replace_all(text, |caps: &regex::Captures<'_>| {
            let groups: Vec<Option<regex::Match<'_>>> =
                (1..=caps.len()).map(|i| caps.get(i)).collect();
            for i in (0..groups.len()).step_by(2) {
                if let (Some(a), Some(b)) =
                    (&groups.get(i).copied().flatten(), &groups.get(i + 1).copied().flatten())
                {
                    return format!("{}{}", a.as_str(), b.as_str());
                }
            }
            caps.get(0).expect("group 0 always exists").as_str().to_string()
        })
        .into_owned()
}

/// Check if a line is a tag-only line (starts and ends with tag delimiters).
fn is_tag_only_line(line: &str) -> bool {
    // Indented lines are continuations, not standalone tag blocks
    if !line.is_empty() && line.starts_with(char::is_whitespace) {
        return false;
    }

    let stripped = line.trim();
    if stripped.is_empty() {
        return false;
    }

    // v0.7.0: HTML comments wrapping block content (table/list) also need blank
    // lines injected, matching Jinja/Markdoc tags. Older versions excluded
    // HTML comments because comrak handles them as HtmlBlock type 2, but the
    // tight-against-list/table case loses the structural boundary without the
    // injected blank line. Preprocess only inserts when the *current* line is
    // block content (see caller), so paragraphs following an HTML comment are
    // unaffected.
    let starts_tag = stripped.starts_with(SINGLE_JINJA_TAG.open_delim)
        || stripped.starts_with(SINGLE_JINJA_COMMENT.open_delim)
        || stripped.starts_with(SINGLE_JINJA_VAR.open_delim)
        || stripped.starts_with(SINGLE_HTML_COMMENT.open_delim);

    let ends_tag = stripped.ends_with(SINGLE_JINJA_TAG.close_delim)
        || stripped.ends_with(SINGLE_JINJA_COMMENT.close_delim)
        || stripped.ends_with(SINGLE_JINJA_VAR.close_delim)
        || stripped.ends_with(SINGLE_HTML_COMMENT.close_delim);

    starts_tag && ends_tag
}

/// COMRAK-WORKAROUND6: Preprocess text to ensure proper blank lines around block
/// content within Jinja/Markdoc/HTML tags. See `filling.rs` module docs.
pub fn preprocess_tag_block_spacing(text: &str) -> String {
    let lines: Vec<&str> = text.split('\n').collect();

    // Check if there are any tag-only lines
    let has_tag_only_lines = lines.iter().any(|line| is_tag_only_line(line));
    if !has_tag_only_lines {
        return text.to_string();
    }

    let mut result_lines: Vec<&str> = Vec::new();

    for (i, line) in lines.iter().enumerate() {
        if i > 0 {
            let prev_line = lines[i - 1];
            let prev_is_empty = prev_line.trim().is_empty();

            // Case 1: Previous line is a tag-only line, current line is block content
            if !prev_is_empty && is_tag_only_line(prev_line) && line_is_block_content(line) {
                result_lines.push("");
            }

            // Case 2: Previous line is block content, current line is a closing tag-only line
            if !prev_is_empty && line_is_block_content(prev_line) && is_tag_only_line(line) {
                result_lines.push("");
            }
        }

        result_lines.push(line);
    }

    result_lines.join("\n")
}

/// Check if a line ends with a Jinja/Markdoc tag or HTML comment.
pub(crate) fn line_ends_with_tag(line: &str) -> bool {
    let stripped = line.trim_end();
    if stripped.is_empty() {
        return false;
    }
    stripped.ends_with(SINGLE_JINJA_TAG.close_delim)
        || stripped.ends_with(SINGLE_JINJA_COMMENT.close_delim)
        || stripped.ends_with(SINGLE_JINJA_VAR.close_delim)
        || stripped.ends_with(SINGLE_HTML_COMMENT.close_delim)
}

/// Check if a line starts with a Jinja/Markdoc tag or HTML comment.
pub(crate) fn line_starts_with_tag(line: &str) -> bool {
    let stripped = line.trim_start();
    if stripped.is_empty() {
        return false;
    }
    stripped.starts_with(SINGLE_JINJA_TAG.open_delim)
        || stripped.starts_with(SINGLE_JINJA_COMMENT.open_delim)
        || stripped.starts_with(SINGLE_JINJA_VAR.open_delim)
        || stripped.starts_with(SINGLE_HTML_COMMENT.open_delim)
}

/// Check if a line is an unindented line that starts with a tag.
fn is_unindented_tag_line(line: &str) -> bool {
    if line.is_empty() {
        return false;
    }
    if line.starts_with(char::is_whitespace) {
        return false;
    }
    line_starts_with_tag(line)
}

/// Check if a line is a closing tag.
fn is_closing_tag(line: &str) -> bool {
    let stripped = line.trim_start();
    stripped.starts_with("{% /")
        || stripped.starts_with("{# /")
        || stripped.starts_with("{{ /")
        || stripped.starts_with("<!-- /")
}

/// Fix closing tag spacing for block content.
pub fn fix_closing_tag_spacing(text: &str) -> String {
    let lines: Vec<&str> = text.split('\n').collect();
    let mut fixed_lines: Vec<String> = Vec::new();

    for (i, line) in lines.iter().enumerate() {
        if is_closing_tag(line) {
            let stripped = line.trim_start().to_string();
            if i > 0 && !fixed_lines.is_empty() {
                let prev_line = &fixed_lines[fixed_lines.len() - 1];
                let prev_is_empty = prev_line.trim().is_empty();
                let prev_is_block = line_is_block_content(prev_line);
                if !prev_is_empty && prev_is_block {
                    fixed_lines.push(String::new());
                }
            }
            fixed_lines.push(stripped);
        } else {
            fixed_lines.push((*line).to_string());
        }
    }

    fixed_lines.join("\n")
}

/// Ensure closing tags are on their own line when the opening tag spans multiple lines.
pub fn fix_multiline_opening_tag_with_closing(text: &str) -> String {
    if !text.contains('\n') {
        return text.to_string();
    }

    let lines: Vec<&str> = text.split('\n').collect();
    let mut result_lines: Vec<String> = Vec::new();

    for (i, line) in lines.iter().enumerate() {
        if i == 0 {
            result_lines.push((*line).to_string());
            continue;
        }

        let stripped = line.trim_start();
        let is_tag_start = stripped.starts_with(SINGLE_JINJA_TAG.open_delim)
            || stripped.starts_with(SINGLE_JINJA_COMMENT.open_delim)
            || stripped.starts_with(SINGLE_JINJA_VAR.open_delim)
            || stripped.starts_with(SINGLE_HTML_COMMENT.open_delim);

        if !is_tag_start {
            if let Some(_m) = MULTILINE_CLOSING_PATTERN.find(line) {
                // Find which named group matched
                let caps = MULTILINE_CLOSING_PATTERN
                    .captures(line)
                    .expect("captures must succeed after find");
                let mut found = false;
                for group_name in &["closing_tag", "closing_comment", "closing_var", "closing_html"]
                {
                    if caps.name(group_name).is_some() {
                        let split_pos = caps
                            .name(group_name)
                            .expect("named group must exist after is_some check")
                            .start();
                        let before = line[..split_pos].trim_end();
                        let closing = line[split_pos..].trim_start();
                        result_lines.push(before.to_string());
                        result_lines.push(closing.to_string());
                        found = true;
                        break;
                    }
                }
                if found {
                    continue;
                }
            }
        }

        result_lines.push((*line).to_string());
    }

    result_lines.join("\n")
}

/// Augments a `LineWrapper` to preserve newlines around Jinja/Markdoc tags
/// and HTML comments.
#[allow(clippy::type_complexity)]
pub(crate) fn add_tag_newline_handling(
    base_wrapper: Box<dyn Fn(&str, &str, &str) -> String + Send + Sync>,
) -> LineWrapper {
    Box::new(move |text: &str, initial_indent: &str, subsequent_indent: &str| -> String {
        // If no newlines in input, just wrap and apply post-processing fixes.
        if !text.contains('\n') {
            let result = base_wrapper(text, initial_indent, subsequent_indent);
            return fix_multiline_opening_tag_with_closing(&result);
        }

        let lines: Vec<&str> = text.split('\n').collect();

        if lines.len() <= 1 {
            let result = base_wrapper(text, initial_indent, subsequent_indent);
            return fix_multiline_opening_tag_with_closing(&result);
        }

        // Check if there are any tags in the text
        let has_tags =
            lines.iter().any(|line| line_ends_with_tag(line) || line_starts_with_tag(line));

        // Group lines into segments
        let mut segments: Vec<String> = Vec::new();
        let mut current_segment_lines: Vec<&str> = Vec::new();

        for (i, line) in lines.iter().enumerate() {
            let is_first_line = i == 0;
            let prev_ends_with_tag = !is_first_line && line_ends_with_tag(lines[i - 1]);
            let curr_starts_with_tag = is_unindented_tag_line(line);
            // Table rows are ALWAYS structural boundaries (never line-wrapped),
            // matching Python. List items are boundaries only when tags are present.
            let curr_is_table = line_is_table_row(line);
            let prev_is_table = !is_first_line && line_is_table_row(lines[i - 1]);
            let curr_is_block = curr_is_table || (has_tags && line_is_list_item(line));
            let prev_is_block =
                prev_is_table || (has_tags && !is_first_line && line_is_list_item(lines[i - 1]));

            if (prev_ends_with_tag || curr_starts_with_tag || curr_is_block || prev_is_block)
                && !current_segment_lines.is_empty()
            {
                segments.push(current_segment_lines.join("\n"));
                current_segment_lines.clear();
            }

            current_segment_lines.push(line);
        }

        if !current_segment_lines.is_empty() {
            segments.push(current_segment_lines.join("\n"));
        }

        if segments.len() == 1 {
            let result = base_wrapper(text, initial_indent, subsequent_indent);
            return fix_multiline_opening_tag_with_closing(&result);
        }

        // Wrap each segment separately. Segments made entirely of table rows are
        // structural markdown and are preserved verbatim (never wrapped), with
        // separator rows normalized to three dashes — matching Python.
        let mut wrapped_segments: Vec<String> = Vec::new();
        for (i, segment) in segments.iter().enumerate() {
            let is_first = i == 0;
            let cur_initial_indent = if is_first { initial_indent } else { subsequent_indent };
            let segment_lines: Vec<&str> = segment.split('\n').collect();
            let all_table_rows =
                segment_lines.iter().filter(|l| !l.trim().is_empty()).all(|l| line_is_table_row(l));
            let wrapped = if all_table_rows {
                segment_lines
                    .iter()
                    .enumerate()
                    .map(|(j, line)| {
                        let indent = if j == 0 { cur_initial_indent } else { subsequent_indent };
                        if line.trim().is_empty() {
                            (*line).to_string()
                        } else {
                            format!("{indent}{}", normalize_table_separator(line))
                        }
                    })
                    .collect::<Vec<_>>()
                    .join("\n")
            } else {
                base_wrapper(segment, cur_initial_indent, subsequent_indent)
            };
            wrapped_segments.push(wrapped);
        }

        // Rejoin segments, normalizing newlines around block content
        let mut result_parts: Vec<String> = Vec::new();
        for (i, wrapped) in wrapped_segments.iter().enumerate() {
            if i == 0 {
                result_parts.push(wrapped.clone());
                continue;
            }

            let prev_segment = &segments[i - 1];
            let curr_segment = &segments[i];

            let prev_is_block = prev_segment.split('\n').any(line_is_block_content);
            let curr_is_block = curr_segment.split('\n').any(line_is_block_content);

            let prev_last_line = prev_segment.split('\n').next_back().unwrap_or("");
            let curr_first_line = curr_segment.split('\n').next().unwrap_or("");

            let prev_is_tag = line_ends_with_tag(prev_last_line);
            let curr_is_tag = is_unindented_tag_line(curr_first_line);

            if (prev_is_tag && curr_is_block) || (prev_is_block && curr_is_tag) {
                result_parts.push(String::new());
                result_parts.push(wrapped.clone());
            } else {
                result_parts.push(wrapped.clone());
            }
        }

        let result = result_parts.join("\n");

        // Post-process
        let result = fix_closing_tag_spacing(&result);
        fix_multiline_opening_tag_with_closing(&result)
    })
}