spandex_hyphenation/
iter.rs

1/*!
2Hyphenating iterators over strings.
3*/
4
5use std::borrow::Cow;
6use std::iter::{Cloned, IntoIterator, ExactSizeIterator};
7use std::slice;
8use std::vec;
9
10use hyphenator::*;
11use extended::*;
12
13
14/// A hyphenating iterator that breaks text into segments delimited by word
15/// breaks, and marks them with a hyphen where appropriate.
16///
17/// Such segments generally coincide with orthographic syllables, albeit
18/// within the limited accuracy of Knuth-Liang hyphenation.
19#[derive(Clone, Debug)]
20pub struct Hyphenating<'m, I> {
21    inner : I,
22    mark : &'m str
23}
24
25impl<'m, I, S> Hyphenating<'m, I>
26where I : Iterator<Item = S>
27    , S : AsRef<str>
28{
29    /// Turn into an iterator that yields word segments only, without inserting
30    /// a hyphen or other mark before breaks.
31    pub fn segments(self) -> I {
32        self.inner
33    }
34
35    /// Set the mark that will be inserted before word breaks.
36    pub fn mark_with(&mut self, mark : &'m str) {
37        self.mark = mark;
38    }
39
40    /// Build a hyphenating iterator from an iterator over string segments.
41    pub fn new(iter : I) -> Self { Hyphenating { inner : iter, mark : "-" } }
42}
43
44impl<'m, I, S> Iterator for Hyphenating<'m, I>
45where I : Iterator<Item = S> + ExactSizeIterator
46    , S : AsRef<str>
47{
48    type Item = String;
49
50    fn next(&mut self) -> Option<Self::Item> {
51        self.inner.next().map(|segment|
52            if self.inner.len() > 0 {
53                [segment.as_ref(), self.mark].concat()
54            } else { segment.as_ref().to_owned() }
55        )
56    }
57
58    fn size_hint(&self) -> (usize, Option<usize>) { self.inner.size_hint() }
59}
60
61impl<'m, I, S> ExactSizeIterator for Hyphenating<'m, I>
62where I : Iterator<Item = S> + ExactSizeIterator
63    , S : AsRef<str> {}
64
65
66/// A hyphenating iterator with borrowed data.
67pub trait Iter<'t> {
68    type Iter;
69
70    fn iter(&'t self) -> Hyphenating<'t, Self::Iter>;
71}
72
73impl<'t> Iter<'t> for Word<'t, usize> {
74    type Iter = Segments<'t, Cloned<slice::Iter<'t, usize>>>;
75
76    fn iter(&'t self) -> Hyphenating<'t, Self::Iter> {
77        Hyphenating::new(Segments::new(self.text, self.breaks.iter().cloned()))
78    }
79}
80
81impl<'t> IntoIterator for Word<'t, usize> {
82    type Item = String;
83    type IntoIter = Hyphenating<'t, Segments<'t, vec::IntoIter<usize>>>;
84
85    fn into_iter(self) -> Self::IntoIter {
86        Hyphenating::new(Segments::new(self.text, self.breaks.into_iter()))
87    }
88}
89
90impl<'t> IntoIterator for Word<'t, (usize, Option<&'t Subregion>)> {
91    type Item = String;
92    type IntoIter = Hyphenating<'t,
93        SegmentsExt<'t, vec::IntoIter<(usize, Option<&'t Subregion>)>>
94    >;
95
96    fn into_iter(self) -> Self::IntoIter {
97        Hyphenating::new(SegmentsExt::new(self.text, self.breaks.into_iter()))
98    }
99}
100
101
102/// An iterator over borrowed slices delimited by Standard hyphenation
103/// opportunities.
104#[derive(Clone, Debug)]
105pub struct Segments<'t, I> {
106    text : &'t str,
107    breaks : I,
108    start : Option<usize>
109}
110
111impl<'t, I> Segments<'t, I> {
112    pub fn new(text : &'t str, breaks : I) -> Self {
113        Segments {
114            text,
115            breaks,
116            start : Some(0)
117        }
118    }
119}
120
121impl<'t, I> Iterator for Segments<'t, I> where I : Iterator<Item = usize> {
122    type Item = &'t str;
123
124    #[inline]
125    fn next(&mut self) -> Option<Self::Item> {
126        match self.breaks.next() {
127            None => self.start.take().map(|i| &self.text[i ..]),
128            Some(index) => {
129                let (start, end) = (self.start.unwrap(), index);
130                let segment = &self.text[start .. end];
131
132                self.start = Some(end);
133                Some(segment)
134            }
135        }
136    }
137
138    #[inline]
139    fn size_hint(&self) -> (usize, Option<usize>) {
140        let (lower, upper) = self.breaks.size_hint();
141        let stagger = self.start.iter().len();
142        (lower + stagger, upper.map(|n| n + stagger))
143    }
144}
145
146impl<'t, I> ExactSizeIterator for Segments<'t, I>
147where I : Iterator<Item = usize> + ExactSizeIterator {}
148
149
150/// An iterator over string segments delimited by Extended hyphenation
151/// opportunities. A segment may be borrowed or owned, depending on whether
152/// the break requires changes to neighboring letters.
153#[derive(Clone, Debug)]
154pub struct SegmentsExt<'t, I> {
155    text : &'t str,
156    breaks : I,
157    start : Option<usize>,
158    queued : Option<(usize, &'t str)>
159}
160
161impl<'t, I> SegmentsExt<'t, I> {
162    pub fn new(text : &'t str, breaks : I) -> Self {
163        SegmentsExt {
164            text,
165            breaks,
166            start : Some(0),
167            queued : None
168        }
169    }
170
171    fn substitute(&mut self, text : &'t str) -> Cow<'t, str> {
172        match self.queued.take() {
173            None => Cow::Borrowed(text),
174            Some((skip, ref subst)) => Cow::Owned([subst, &text[skip ..]].concat())
175        }
176    }
177}
178
179
180
181impl<'t, I>  Iterator for SegmentsExt<'t, I>
182where I : Iterator<Item = (usize, Option<&'t Subregion>)> {
183    type Item = Cow<'t, str>;
184
185    fn next(&mut self) -> Option<Self::Item> {
186        match self.breaks.next() {
187            None => self.start.take().map(|start| self.substitute(&self.text[start ..])),
188            Some((index, None)) => {
189                let start = self.start.unwrap();
190                self.start = Some(index);
191                Some(self.substitute(&self.text[start .. index]))
192            },
193            Some((index, Some(ref subr))) => {
194                let (start, end) = (self.start.unwrap(), index);
195                self.start = Some(index);
196
197                let (segment_start, fore) = self.queued.take().unwrap_or((start, ""));
198                let (segment_end, aft) = {
199                    let (subst, queued) = subr.substitution.split_at(subr.breakpoint);
200                    if queued.len() > 0 {
201                        self.queued = Some((subr.right, queued));
202                    }
203                    (end - subr.left, subst)
204                };
205
206                let segment = [fore, &self.text[segment_start .. segment_end], aft].concat();
207                Some(Cow::Owned(segment))
208            }
209        }
210    }
211
212    #[inline]
213    fn size_hint(&self) -> (usize, Option<usize>) {
214        let (lower, upper) = self.breaks.size_hint();
215        let stagger = self.start.iter().len();
216        (lower + stagger, upper.map(|n| n + stagger))
217    }
218}
219
220impl<'t, I> ExactSizeIterator for SegmentsExt<'t, I>
221where I : Iterator<Item = (usize, Option<&'t Subregion>)>
222        + ExactSizeIterator {}