1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
use std::ops::Range;
use textwrap::{unfill, fill, refill};
use textwrap::Options as TwOptions;
use textwrap::wrap_algorithms::FirstFit;
use textwrap::word_separators::AsciiSpace;
use textwrap::word_splitters::NoHyphenation;
use pulldown_cmark::{Event, Parser, Tag};
use pulldown_cmark::Options as CmarkOptions;
use partial_application::partial;


// Interface functions:

pub fn wrap(raw: &str, new_width: usize) -> String {
    zip(raw, partial!(fill => _, opts(new_width)))
}
pub fn rewrap(raw: &str, new_width: usize) -> String {
    zip(raw, partial!(refill => _, opts(new_width)))
}
pub fn unwrap(raw: &str) -> String {
    zip(raw, partial!(unwrap_prefixed => _))
}


// Internal functions:

/// Produce a textwrap configuration for reversible programmatic applications, not readability or
/// aesthetics.
fn opts<'a>(width: usize) -> TwOptions<'a> {
    // TODO: Expose the configuration interface of textwrap more fully, not just width.
    // TODO: Memoization for performance?
    // TODO: Context-sensitive indentation; cf. https://github.com/mgeisler/textwrap/issues/224.
    TwOptions {
        width,
        initial_indent: "",
        subsequent_indent: "",
        break_words: false,
        wrap_algorithm: Box::new(FirstFit),
        word_separator: Box::new(AsciiSpace),
        word_splitter: Box::new(NoHyphenation),
    }
}

/// Act as a predicate to identify paragraphs.
fn pred((e, _r): &(Event, Range<usize>)) -> bool {
    matches!(e, Event::Start(Tag::Paragraph))
}
/// Select ranges filtered by pred().
fn pick((_e, r): (Event, Range<usize>)) -> Range<usize> {
    r
}

/// Join together modified paragraphs and other content.
/// This uses a pulldown-cmark event stream and a closure acting upon each paragraph.
fn zip<F: Fn(&str) -> String>(raw: &str, pfn: F) -> String {
    // “pranges” is an iterable of Ranges describing the beginning and ending of every paragraph of
    // text in the original document.
    let pranges = Parser::new_ext(raw, CmarkOptions::empty()).into_offset_iter().filter(pred).map(pick);

    // Combine untouched and retouched strings like a zip fastener.
    let mut new = String::new();
    let mut lastoffset: usize = 0;
    for range in pranges {
        if lastoffset < range.start {
            new.push_str(&raw[lastoffset..range.start]);
        }
        new.push_str(&pfn(&raw[range.start..range.end]));
        lastoffset = range.end;
    }
    if lastoffset < raw.len() {
        // Retain whatever is left after the final paragraph.
        new.push_str(&raw[lastoffset..raw.len()]);
    }
    return new;
}

/// Preserve initial indentation on unwrapping.
/// This is a workaround for textwrap’s tendency to interpret non-alphanumeric leading characters
/// as indentation (e.g. comment syntax) and destroy it. What textwrap calls “subsequent_indent” is
/// destroyed without comment.
fn unwrap_prefixed(raw: &str) -> String {
    let (content, properties) = unfill(raw);
    return String::from(properties.initial_indent) + &content;
}