ruby_string/string.rs
1use crate::*;
2use std::iter::FromIterator;
3
4#[derive(Clone, Copy, Debug, PartialEq, Eq)]
5pub(crate) struct Placement {
6 //NOTE: all indices are in bytes
7 pub(crate) text_start: usize,
8 pub(crate) text_end: usize,
9 pub(crate) ruby_start: usize,
10 //TODO: this field is redundant, we could remove it and use the ruby_start field of the next
11 //Placement in order instead
12 pub(crate) ruby_end: usize,
13}
14
15///A string type that can have [ruby glosses](https://en.wikipedia.org/wiki/Ruby_character)
16///attached to parts of it.
17///
18///## Memory layout
19///
20///The text content is stored in two String instances, one being the main text and one being a
21///concatenation of all the rubies. Placement of the rubies is stored as a list of indices into
22///both strings. Compared to the trivial structure (where each rubied substring is held as a
23///separate String), this layout reduces memory usage and the number of separate allocations at the
24///expense of slightly more complicated indexing logic.
25#[derive(Clone, PartialEq, Eq)]
26pub struct RubyString {
27 pub(crate) packed_text: String,
28 pub(crate) packed_ruby: String,
29 pub(crate) placements: Vec<Placement>,
30}
31
32impl RubyString {
33 ///Creates a new empty `RubyString`.
34 pub fn new() -> RubyString {
35 RubyString {
36 packed_text: String::new(),
37 packed_ruby: String::new(),
38 placements: Vec::new(),
39 }
40 }
41
42 ///Appends plain text (that does not have a ruby gloss attached to it) to this `RubyString`.
43 pub fn push_str(&mut self, string: &str) {
44 self.packed_text.push_str(string);
45 }
46
47 ///Appends text to this `RubyString` and attaches a ruby gloss to it.
48 pub fn push_segment<'a>(&mut self, segment: Segment<'a>) {
49 match segment {
50 Segment::Plain { text } => {
51 self.packed_text.push_str(text);
52 }
53 Segment::Rubied { text, ruby } => {
54 let text_start = self.packed_text.len();
55 let ruby_start = self.packed_ruby.len();
56 self.packed_text.push_str(text);
57 self.packed_ruby.push_str(ruby);
58 self.placements.push(Placement {
59 text_start,
60 text_end: text_start + text.len(),
61 ruby_start,
62 ruby_end: ruby_start + ruby.len(),
63 });
64 }
65 }
66 }
67
68 ///Returns the plain text stored in this `RubyString`. The result has no ruby glosses attached
69 ///to it anymore.
70 ///
71 ///```
72 ///# use ruby_string::{RubyString, Segment};
73 ///let mut rs = RubyString::new();
74 ///rs.push_str("ここは");
75 ///rs.push_segment(Segment::Rubied { text: "東", ruby: "とう" });
76 ///rs.push_segment(Segment::Rubied { text: "京", ruby: "きょう" });
77 ///rs.push_str("です");
78 ///assert_eq!(rs.to_plain_text(), "ここは東京です");
79 ///```
80 pub fn to_plain_text(&self) -> String {
81 self.segments().map(|s| s.plain_text()).collect()
82 }
83
84 ///Returns an encoding of this `RubyString` as a plain String using interlinear annotation
85 ///characters.
86 ///
87 ///```
88 ///# use ruby_string::{RubyString, Segment};
89 ///let mut rs = RubyString::new();
90 ///rs.push_str("ここは");
91 ///rs.push_segment(Segment::Rubied { text: "東", ruby: "とう" });
92 ///rs.push_segment(Segment::Rubied { text: "京", ruby: "きょう" });
93 ///rs.push_str("です");
94 ///let encoded = rs.to_interlinear_encoding();
95 ///assert_eq!(encoded, "ここは\u{FFF9}東\u{FFFA}とう\u{FFFB}\u{FFF9}京\u{FFFA}きょう\u{FFFB}です");
96 ///```
97 pub fn to_interlinear_encoding(&self) -> String {
98 self.segments()
99 .map(|s| s.to_interlinear_encoding())
100 .collect()
101 }
102
103 ///An iterator over the segments in this `RubyString`.
104 pub fn segments(&self) -> SegmentIterator<'_> {
105 SegmentIterator {
106 string: self,
107 next_text_start: 0,
108 next_placement_idx: 0,
109 }
110 }
111}
112
113impl Default for RubyString {
114 fn default() -> Self {
115 Self::new()
116 }
117}
118
119impl<T: Into<String>> From<T> for RubyString {
120 fn from(val: T) -> RubyString {
121 RubyString {
122 packed_text: val.into(),
123 packed_ruby: String::new(),
124 placements: Vec::new(),
125 }
126 }
127}
128
129impl<'a> FromIterator<Segment<'a>> for RubyString {
130 fn from_iter<I: IntoIterator<Item = Segment<'a>>>(iter: I) -> RubyString {
131 let mut s = RubyString::new();
132 s.extend(iter);
133 s
134 }
135}
136
137impl<'a> Extend<Segment<'a>> for RubyString {
138 fn extend<I: IntoIterator<Item = Segment<'a>>>(&mut self, iter: I) {
139 iter.into_iter().for_each(move |s| self.push_segment(s));
140 }
141}