1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
use super::chunk::Chunk;
use super::segment::Segment;
use crate::utils::span::SpannedText;
use std::rc::Rc;
use unicode_segmentation::UnicodeSegmentation as _;
use unicode_width::UnicodeWidthStr as _;
use xi_unicode::LineBreakLeafIter;

/// Iterator that returns non-breakable chunks of text.
///
/// Works accross spans of text.
pub struct ChunkIterator<S> {
    /// Input that we want to chunk.
    source: Rc<S>,

    /// ID of the span we are processing.
    current_span: usize,

    /// How much of the current span has been processed already.
    offset: usize,
}

impl<S> ChunkIterator<S> {
    /// Creates a new ChunkIterator on the given styled string.
    pub fn new(source: Rc<S>) -> Self {
        ChunkIterator {
            source,
            current_span: 0,
            offset: 0,
        }
    }
}

/// This iterator produces chunks of non-breakable text.
///
/// These chunks may go accross spans (a single word may be broken into more
/// than one span, for instance if parts of it are marked up differently).
impl<S> Iterator for ChunkIterator<S>
where
    S: SpannedText,
{
    type Item = Chunk;

    fn next(&mut self) -> Option<Self::Item> {
        // Stop when we processed all spans
        if self.current_span >= self.source.spans().len() {
            return None;
        }

        // Skip empty spans
        if self.source.spans()[self.current_span].as_ref().is_empty() {
            self.current_span += 1;
            return self.next();
        }

        // Current span & associated text
        let mut span = self.source.spans()[self.current_span].as_ref();
        let mut span_text = span.resolve(self.source.source());

        let mut total_width = 0;

        // We'll accumulate segments from spans.
        let mut segments: Vec<Segment> = Vec::new();

        // We'll use an iterator from xi-unicode to detect possible breaks.
        let mut iter = LineBreakLeafIter::new(span_text, self.offset);

        // When we reach the end of a span, xi-unicode returns a break, but it
        // actually depends on the next span. Such breaks are "fake" breaks.
        //
        // So we'll loop until we find a "true" break
        // (a break that doesn't happen an the end of a span).
        // Note that if a break is a "hard" stop, then it is always a "true" break.
        //
        // Most of the time, it will happen on the first iteration.
        loop {
            // Look at next possible break
            // `hard_stop = true` means that the break is non-optional,
            // like after a `\n`.
            let (pos, hard_stop) = iter.next(span_text);

            // When xi-unicode reaches the end of a span, it returns a "fake"
            // break. To know if it's actually a true break, we need to give
            // it the next span. If, given the next span, it returns a break
            // at position 0, then the previous one was a true break.
            // So when pos = 0, we don't really have a new segment, but we
            // can end the current chunk.

            let (width, ends_with_space) = if pos == 0 {
                // If pos = 0, we had a span before, and we just learned it
                // was a possible break.
                assert!(
                    self.current_span > 0,
                    "Cannot receive pos == 0 for the first span."
                );

                // We need to check if the last segment ended with a space.
                // TODO: skip empty spans when going back
                let prev_span_id = segments.last().unwrap().span_id;
                let prev_span = self.source.spans()[prev_span_id].as_ref();
                let prev_text = prev_span.resolve(self.source.source());

                if hard_stop {
                    // So the previous chunk had a line-break or something?
                    assert!(
                        !segments.is_empty(),
                        "Cannot receive pos == 0 in the first segment."
                    );

                    // We didn't know it was a hard-stop at the time.
                    // But now we do, so let's omit the last character from
                    // that segment.
                    if let Some(to_remove) =
                        prev_text.graphemes(true).next_back().map(|g| g.len())
                    {
                        segments.last_mut().unwrap().end -= to_remove;
                    }
                }

                (0, prev_text.ends_with(' '))
            } else {
                // We actually got something.
                // Remember its width, and whether it ends with a space.
                //
                // (When a chunk ends with a space, we may compress it a bit
                // near the end of a row, so this information will be useful
                // later.)
                let text = &span_text[self.offset..pos];

                (text.width(), text.ends_with(' '))
            };

            if pos != 0 {
                // If pos != 0, we got an actual segment of a span.
                total_width += width;
                let to_remove = if hard_stop {
                    let text = &span_text[self.offset..pos];
                    text.graphemes(true)
                        .next_back()
                        .map(|g| g.len())
                        .unwrap_or(0)
                } else {
                    0
                };
                segments.push(Segment {
                    span_id: self.current_span,
                    start: self.offset,
                    end: pos - to_remove,
                    width,
                });
            }

            if pos == span_text.len() {
                assert!(
                    !hard_stop,
                    "Cannot have hard-break at the end of a span."
                );
                // ... or can we?

                // If we reached the end of the slice,
                // we need to look at the next span first.
                self.current_span += 1;

                // Skip empty spans
                while let Some(true) =
                    self.source.spans().get(self.current_span).map(|span| {
                        span.as_ref().resolve(self.source.source()).is_empty()
                    })
                {
                    self.current_span += 1;
                }

                if self.current_span >= self.source.spans().len() {
                    // If this was the last chunk, return as is!
                    // Well, make sure we don't end with a newline...
                    if span_text.ends_with('\n') {
                        // This is basically a hard-stop here.
                        // Easy, just remove 1 byte.
                        segments.last_mut().unwrap().end -= 1;
                    }

                    return Some(Chunk {
                        width: total_width,
                        segments,
                        hard_stop,
                        ends_with_space,
                    });
                }

                span = self.source.spans()[self.current_span].as_ref();
                span_text = span.resolve(self.source.source());
                self.offset = 0;
                continue;
            }

            // Remember where we are.
            self.offset = pos;

            // We found a valid stop, return the current chunk.
            return Some(Chunk {
                width: total_width,
                segments,
                hard_stop,
                ends_with_space,
            });
        }
    }
}