ruby_string/
string.rs

1use crate::*;
2use std::iter::FromIterator;
3
4#[derive(Clone, Copy, Debug, PartialEq, Eq)]
5pub(crate) struct Placement {
6    //NOTE: all indices are in bytes
7    pub(crate) text_start: usize,
8    pub(crate) text_end: usize,
9    pub(crate) ruby_start: usize,
10    //TODO: this field is redundant, we could remove it and use the ruby_start field of the next
11    //Placement in order instead
12    pub(crate) ruby_end: usize,
13}
14
15///A string type that can have [ruby glosses](https://en.wikipedia.org/wiki/Ruby_character)
16///attached to parts of it.
17///
18///## Memory layout
19///
20///The text content is stored in two String instances, one being the main text and one being a
21///concatenation of all the rubies. Placement of the rubies is stored as a list of indices into
22///both strings. Compared to the trivial structure (where each rubied substring is held as a
23///separate String), this layout reduces memory usage and the number of separate allocations at the
24///expense of slightly more complicated indexing logic.
25#[derive(Clone, PartialEq, Eq)]
26pub struct RubyString {
27    pub(crate) packed_text: String,
28    pub(crate) packed_ruby: String,
29    pub(crate) placements: Vec<Placement>,
30}
31
32impl RubyString {
33    ///Creates a new empty `RubyString`.
34    pub fn new() -> RubyString {
35        RubyString {
36            packed_text: String::new(),
37            packed_ruby: String::new(),
38            placements: Vec::new(),
39        }
40    }
41
42    ///Appends plain text (that does not have a ruby gloss attached to it) to this `RubyString`.
43    pub fn push_str(&mut self, string: &str) {
44        self.packed_text.push_str(string);
45    }
46
47    ///Appends text to this `RubyString` and attaches a ruby gloss to it.
48    pub fn push_segment<'a>(&mut self, segment: Segment<'a>) {
49        match segment {
50            Segment::Plain { text } => {
51                self.packed_text.push_str(text);
52            }
53            Segment::Rubied { text, ruby } => {
54                let text_start = self.packed_text.len();
55                let ruby_start = self.packed_ruby.len();
56                self.packed_text.push_str(text);
57                self.packed_ruby.push_str(ruby);
58                self.placements.push(Placement {
59                    text_start,
60                    text_end: text_start + text.len(),
61                    ruby_start,
62                    ruby_end: ruby_start + ruby.len(),
63                });
64            }
65        }
66    }
67
68    ///Returns the plain text stored in this `RubyString`. The result has no ruby glosses attached
69    ///to it anymore.
70    ///
71    ///```
72    ///# use ruby_string::{RubyString, Segment};
73    ///let mut rs = RubyString::new();
74    ///rs.push_str("ここは");
75    ///rs.push_segment(Segment::Rubied { text: "東", ruby: "とう" });
76    ///rs.push_segment(Segment::Rubied { text: "京", ruby: "きょう" });
77    ///rs.push_str("です");
78    ///assert_eq!(rs.to_plain_text(), "ここは東京です");
79    ///```
80    pub fn to_plain_text(&self) -> String {
81        self.segments().map(|s| s.plain_text()).collect()
82    }
83
84    ///Returns an encoding of this `RubyString` as a plain String using interlinear annotation
85    ///characters.
86    ///
87    ///```
88    ///# use ruby_string::{RubyString, Segment};
89    ///let mut rs = RubyString::new();
90    ///rs.push_str("ここは");
91    ///rs.push_segment(Segment::Rubied { text: "東", ruby: "とう" });
92    ///rs.push_segment(Segment::Rubied { text: "京", ruby: "きょう" });
93    ///rs.push_str("です");
94    ///let encoded = rs.to_interlinear_encoding();
95    ///assert_eq!(encoded, "ここは\u{FFF9}東\u{FFFA}とう\u{FFFB}\u{FFF9}京\u{FFFA}きょう\u{FFFB}です");
96    ///```
97    pub fn to_interlinear_encoding(&self) -> String {
98        self.segments()
99            .map(|s| s.to_interlinear_encoding())
100            .collect()
101    }
102
103    ///An iterator over the segments in this `RubyString`.
104    pub fn segments(&self) -> SegmentIterator<'_> {
105        SegmentIterator {
106            string: self,
107            next_text_start: 0,
108            next_placement_idx: 0,
109        }
110    }
111}
112
113impl Default for RubyString {
114    fn default() -> Self {
115        Self::new()
116    }
117}
118
119impl<T: Into<String>> From<T> for RubyString {
120    fn from(val: T) -> RubyString {
121        RubyString {
122            packed_text: val.into(),
123            packed_ruby: String::new(),
124            placements: Vec::new(),
125        }
126    }
127}
128
129impl<'a> FromIterator<Segment<'a>> for RubyString {
130    fn from_iter<I: IntoIterator<Item = Segment<'a>>>(iter: I) -> RubyString {
131        let mut s = RubyString::new();
132        s.extend(iter);
133        s
134    }
135}
136
137impl<'a> Extend<Segment<'a>> for RubyString {
138    fn extend<I: IntoIterator<Item = Segment<'a>>>(&mut self, iter: I) {
139        iter.into_iter().for_each(move |s| self.push_segment(s));
140    }
141}