1use std::cell::RefCell;
9
10use anyhow::{bail, Result};
11
12use crate::hybrid_reference::{external_reference, external_reference_value};
13use crate::link_storage::LinkStorage;
14use crate::named_links::NamedLinks;
15use crate::pinned_types::PinnedTypes;
16use crate::sequences::{
17 AddressToRawNumberConverter, BalancedVariantConverter, CachingConverterDecorator,
18 CharToUnicodeSymbolConverter, RawNumberToAddressConverter, RightSequenceWalker,
19 StringToUnicodeSequenceConverter, TargetMatcher, UnicodeSequenceToStringConverter,
20 UnicodeSymbolToCharConverter,
21};
22
23pub struct UnicodeStringStorage<'a> {
25 links: &'a mut LinkStorage,
26 type_type: u32,
27 unicode_symbol_type: u32,
28 unicode_sequence_type: u32,
29 string_type: u32,
30 empty_string_type: u32,
31 name_type: u32,
32 address_to_number_converter: AddressToRawNumberConverter,
33 number_to_address_converter: RawNumberToAddressConverter,
34 balanced_variant_converter: BalancedVariantConverter,
35 unicode_symbol_criterion_matcher: TargetMatcher,
36 unicode_sequence_criterion_matcher: TargetMatcher,
37 char_to_unicode_symbol_converter: CharToUnicodeSymbolConverter,
38 unicode_symbol_to_char_converter: UnicodeSymbolToCharConverter,
39 string_to_unicode_sequence_converter: StringToUnicodeSequenceConverter,
40 sequence_walker: RightSequenceWalker,
41 unicode_sequence_to_string_converter: UnicodeSequenceToStringConverter,
42 string_to_unicode_sequence_cache: CachingConverterDecorator<String, u32>,
43 unicode_sequence_to_string_cache: RefCell<CachingConverterDecorator<u32, String>>,
44}
45
46impl<'a> UnicodeStringStorage<'a> {
47 pub fn new(links: &'a mut LinkStorage) -> Result<Self> {
48 let (
49 type_type,
50 unicode_symbol_type,
51 unicode_sequence_type,
52 string_type,
53 empty_string_type,
54 name_type,
55 ) = {
56 let mut pinned_types = PinnedTypes::new(links);
57 (
58 pinned_types.next_type()?,
59 pinned_types.next_type()?,
60 pinned_types.next_type()?,
61 pinned_types.next_type()?,
62 pinned_types.next_type()?,
63 pinned_types.next_type()?,
64 )
65 };
66
67 let address_to_number_converter = AddressToRawNumberConverter::new();
68 let number_to_address_converter = RawNumberToAddressConverter::new();
69 let balanced_variant_converter = BalancedVariantConverter::new();
70 let unicode_symbol_criterion_matcher = TargetMatcher::new(unicode_symbol_type);
71 let unicode_sequence_criterion_matcher = TargetMatcher::new(unicode_sequence_type);
72 let char_to_unicode_symbol_converter =
73 CharToUnicodeSymbolConverter::new(address_to_number_converter, unicode_symbol_type);
74 let unicode_symbol_to_char_converter = UnicodeSymbolToCharConverter::new(
75 number_to_address_converter,
76 unicode_symbol_criterion_matcher,
77 );
78 let string_to_unicode_sequence_converter = StringToUnicodeSequenceConverter::new(
79 char_to_unicode_symbol_converter,
80 balanced_variant_converter,
81 unicode_sequence_type,
82 );
83 let sequence_walker = RightSequenceWalker::new(unicode_symbol_criterion_matcher);
84 let unicode_sequence_to_string_converter = UnicodeSequenceToStringConverter::new(
85 unicode_sequence_criterion_matcher,
86 sequence_walker,
87 unicode_symbol_to_char_converter,
88 unicode_sequence_type,
89 );
90
91 let mut storage = Self {
92 links,
93 type_type,
94 unicode_symbol_type,
95 unicode_sequence_type,
96 string_type,
97 empty_string_type,
98 name_type,
99 address_to_number_converter,
100 number_to_address_converter,
101 balanced_variant_converter,
102 unicode_symbol_criterion_matcher,
103 unicode_sequence_criterion_matcher,
104 char_to_unicode_symbol_converter,
105 unicode_symbol_to_char_converter,
106 string_to_unicode_sequence_converter,
107 sequence_walker,
108 unicode_sequence_to_string_converter,
109 string_to_unicode_sequence_cache: CachingConverterDecorator::new(),
110 unicode_sequence_to_string_cache: RefCell::new(CachingConverterDecorator::new()),
111 };
112
113 storage.set_name(type_type, "Type")?;
114 storage.set_name(unicode_symbol_type, "UnicodeSymbol")?;
115 storage.set_name(unicode_sequence_type, "UnicodeSequence")?;
116 storage.set_name(string_type, "String")?;
117 storage.set_name(empty_string_type, "EmptyString")?;
118 storage.set_name(name_type, "Name")?;
119
120 Ok(storage)
121 }
122
123 pub fn links_mut(&mut self) -> &mut LinkStorage {
124 self.links
125 }
126
127 pub fn into_named_links(self) -> NamedLinks<'a> {
128 NamedLinks::from_storage(self)
129 }
130
131 pub fn type_type(&self) -> u32 {
132 self.type_type
133 }
134
135 pub fn unicode_symbol_type(&self) -> u32 {
136 self.unicode_symbol_type
137 }
138
139 pub fn unicode_sequence_type(&self) -> u32 {
140 self.unicode_sequence_type
141 }
142
143 pub fn string_type(&self) -> u32 {
144 self.string_type
145 }
146
147 pub fn empty_string_type(&self) -> u32 {
148 self.empty_string_type
149 }
150
151 pub fn name_type(&self) -> u32 {
152 self.name_type
153 }
154
155 pub fn address_to_number_converter(&self) -> AddressToRawNumberConverter {
156 self.address_to_number_converter
157 }
158
159 pub fn number_to_address_converter(&self) -> RawNumberToAddressConverter {
160 self.number_to_address_converter
161 }
162
163 pub fn balanced_variant_converter(&self) -> BalancedVariantConverter {
164 self.balanced_variant_converter
165 }
166
167 pub fn unicode_symbol_criterion_matcher(&self) -> TargetMatcher {
168 self.unicode_symbol_criterion_matcher
169 }
170
171 pub fn unicode_sequence_criterion_matcher(&self) -> TargetMatcher {
172 self.unicode_sequence_criterion_matcher
173 }
174
175 pub fn char_to_unicode_symbol_converter(&self) -> CharToUnicodeSymbolConverter {
176 self.char_to_unicode_symbol_converter
177 }
178
179 pub fn unicode_symbol_to_char_converter(&self) -> UnicodeSymbolToCharConverter {
180 self.unicode_symbol_to_char_converter
181 }
182
183 pub fn string_to_unicode_sequence_converter(&self) -> StringToUnicodeSequenceConverter {
184 self.string_to_unicode_sequence_converter
185 }
186
187 pub fn sequence_walker(&self) -> RightSequenceWalker {
188 self.sequence_walker
189 }
190
191 pub fn unicode_sequence_to_string_converter(&self) -> UnicodeSequenceToStringConverter {
192 self.unicode_sequence_to_string_converter
193 }
194
195 pub fn create_string(&mut self, content: &str) -> Result<u32> {
196 let string_sequence = self.get_string_sequence(content);
197 Ok(self.links.get_or_create(self.string_type, string_sequence))
198 }
199
200 pub fn get_string(&self, string_value: u32) -> Result<String> {
201 let mut current = string_value;
202 for _ in 0..3 {
203 let Some(link) = self.links.get(current) else {
204 break;
205 };
206 if link.source == self.string_type {
207 return if link.target == self.empty_string_type {
208 Ok(String::new())
209 } else {
210 self.unicode_sequence_to_string(link.target)
211 };
212 }
213 current = link.target;
214 }
215 bail!("The passed link does not contain a string.")
216 }
217
218 pub fn unicode_sequence_code_units(&self, string_value: u32) -> Result<Vec<u16>> {
219 let sequence = self.unwrap_string_sequence(string_value)?;
220 if sequence == self.empty_string_type {
221 return Ok(Vec::new());
222 }
223 if !self
224 .unicode_sequence_criterion_matcher
225 .is_matched(self.links, sequence)
226 {
227 bail!("Link {sequence} is not a Unicode sequence.");
228 }
229 let unicode_sequence = self
230 .links
231 .get(sequence)
232 .ok_or_else(|| anyhow::anyhow!("Unicode sequence link {sequence} does not exist."))?;
233
234 self.sequence_walker
235 .walk(self.links, unicode_sequence.source)
236 .into_iter()
237 .map(|symbol| {
238 self.unicode_symbol_to_char_converter
239 .convert(self.links, symbol)
240 })
241 .collect()
242 }
243
244 pub fn get_types(&self) -> Vec<u32> {
245 self.links
246 .query(None, Some(self.type_type), None)
247 .into_iter()
248 .map(|link| link.index)
249 .collect()
250 }
251
252 pub fn is_type(&self, address: u32) -> bool {
253 self.links
254 .get(address)
255 .is_some_and(|link| link.source == self.type_type)
256 }
257
258 pub fn get_or_create_type(&mut self, name: &str) -> Result<u32> {
259 if let Some(existing) = self.get_by_name(name)? {
260 return Ok(existing);
261 }
262
263 let type_link = self.links.create(0, 0);
264 self.links.update(type_link, self.type_type, type_link)?;
265 self.set_name(type_link, name)?;
266 Ok(type_link)
267 }
268
269 pub fn set_name_for_external_reference(&mut self, link: u32, name: &str) -> Result<u32> {
270 self.set_name(external_reference(link), name)
271 }
272
273 pub fn get_name_by_external_reference(&self, link: u32) -> Result<Option<String>> {
274 self.get_name(external_reference(link))
275 }
276
277 pub fn get_external_reference_by_name(&mut self, name: &str) -> Result<Option<u32>> {
278 Ok(self.get_by_name(name)?.and_then(external_reference_value))
279 }
280
281 pub fn remove_name_by_external_reference(&mut self, external_reference_id: u32) -> Result<()> {
282 self.remove_name(external_reference(external_reference_id))
283 }
284
285 pub fn set_name(&mut self, link: u32, name: &str) -> Result<u32> {
286 let name_sequence = self.create_string(name)?;
287 let name_link = self.links.get_or_create(self.name_type, name_sequence);
288 Ok(self.links.get_or_create(link, name_link))
289 }
290
291 pub fn get_name(&self, link: u32) -> Result<Option<String>> {
292 for name_pair in self.links.query(None, Some(link), None) {
293 let name_candidate = name_pair.target;
294 let Some(candidate) = self.links.get(name_candidate) else {
295 continue;
296 };
297 if candidate.source == self.name_type {
298 return self.get_string(candidate.target).map(Some);
299 }
300 }
301 Ok(None)
302 }
303
304 pub fn get_by_name(&mut self, name: &str) -> Result<Option<u32>> {
305 let name_sequence = self.create_string(name)?;
306 let Some(name_link) = self.links.search(self.name_type, name_sequence) else {
307 return Ok(None);
308 };
309 Ok(self
310 .links
311 .query(None, None, Some(name_link))
312 .into_iter()
313 .map(|link| link.source)
314 .next())
315 }
316
317 pub fn remove_name(&mut self, link: u32) -> Result<()> {
318 let name_pairs = self
319 .links
320 .query(None, Some(link), None)
321 .into_iter()
322 .map(|link| (link.index, link.target))
323 .collect::<Vec<_>>();
324
325 for (name_pair, name_candidate) in name_pairs {
326 let Some(candidate) = self.links.get(name_candidate).copied() else {
327 continue;
328 };
329 if candidate.source != self.name_type {
330 continue;
331 }
332
333 if self.links.exists(name_pair) {
334 self.links.delete(name_pair)?;
335 }
336
337 let still_used = self
338 .links
339 .query(None, None, Some(name_candidate))
340 .into_iter()
341 .any(|usage| usage.index != name_pair);
342 if !still_used && self.links.exists(name_candidate) {
343 self.links.delete(name_candidate)?;
344 }
345 }
346
347 Ok(())
348 }
349
350 fn get_string_sequence(&mut self, content: &str) -> u32 {
351 if content.is_empty() {
352 self.empty_string_type
353 } else {
354 self.string_to_unicode_sequence(content)
355 }
356 }
357
358 fn string_to_unicode_sequence(&mut self, content: &str) -> u32 {
359 let input = content.to_string();
360 if let Some(cached) = self.string_to_unicode_sequence_cache.get(&input) {
361 return cached;
362 }
363
364 let converter = self.string_to_unicode_sequence_converter;
365 let sequence = converter.convert(self.links, content);
366 self.string_to_unicode_sequence_cache
367 .insert(input, sequence)
368 }
369
370 fn unicode_sequence_to_string(&self, sequence: u32) -> Result<String> {
371 if let Some(cached) = self
372 .unicode_sequence_to_string_cache
373 .borrow()
374 .get(&sequence)
375 {
376 return Ok(cached);
377 }
378
379 let output = self
380 .unicode_sequence_to_string_converter
381 .convert(self.links, sequence)?;
382 self.unicode_sequence_to_string_cache
383 .borrow_mut()
384 .insert(sequence, output.clone());
385 Ok(output)
386 }
387
388 fn unwrap_string_sequence(&self, string_value: u32) -> Result<u32> {
389 let mut current = string_value;
390 for _ in 0..3 {
391 let Some(link) = self.links.get(current) else {
392 break;
393 };
394 if link.source == self.string_type {
395 return Ok(link.target);
396 }
397 current = link.target;
398 }
399 bail!("The passed link does not contain a string.")
400 }
401}