Skip to main content

link_cli/
unicode_string_storage.rs

1//! Unicode string and name storage backed by doublet links.
2//!
3//! This mirrors the C# `UnicodeStringStorage<uint>` constructor pipeline:
4//! pinned types, `BalancedVariantConverter`, target matchers, Unicode symbol
5//! converters, string/sequence converters, right-sequence walking, and
6//! `NamedLinks`.
7
8use std::cell::RefCell;
9
10use anyhow::{bail, Result};
11
12use crate::hybrid_reference::{external_reference, external_reference_value};
13use crate::link_storage::LinkStorage;
14use crate::named_links::NamedLinks;
15use crate::pinned_types::PinnedTypes;
16use crate::sequences::{
17    AddressToRawNumberConverter, BalancedVariantConverter, CachingConverterDecorator,
18    CharToUnicodeSymbolConverter, RawNumberToAddressConverter, RightSequenceWalker,
19    StringToUnicodeSequenceConverter, TargetMatcher, UnicodeSequenceToStringConverter,
20    UnicodeSymbolToCharConverter,
21};
22
23/// Link-backed Unicode string storage with C# pinned type layout.
24pub struct UnicodeStringStorage<'a> {
25    links: &'a mut LinkStorage,
26    type_type: u32,
27    unicode_symbol_type: u32,
28    unicode_sequence_type: u32,
29    string_type: u32,
30    empty_string_type: u32,
31    name_type: u32,
32    address_to_number_converter: AddressToRawNumberConverter,
33    number_to_address_converter: RawNumberToAddressConverter,
34    balanced_variant_converter: BalancedVariantConverter,
35    unicode_symbol_criterion_matcher: TargetMatcher,
36    unicode_sequence_criterion_matcher: TargetMatcher,
37    char_to_unicode_symbol_converter: CharToUnicodeSymbolConverter,
38    unicode_symbol_to_char_converter: UnicodeSymbolToCharConverter,
39    string_to_unicode_sequence_converter: StringToUnicodeSequenceConverter,
40    sequence_walker: RightSequenceWalker,
41    unicode_sequence_to_string_converter: UnicodeSequenceToStringConverter,
42    string_to_unicode_sequence_cache: CachingConverterDecorator<String, u32>,
43    unicode_sequence_to_string_cache: RefCell<CachingConverterDecorator<u32, String>>,
44}
45
46impl<'a> UnicodeStringStorage<'a> {
47    pub fn new(links: &'a mut LinkStorage) -> Result<Self> {
48        let (
49            type_type,
50            unicode_symbol_type,
51            unicode_sequence_type,
52            string_type,
53            empty_string_type,
54            name_type,
55        ) = {
56            let mut pinned_types = PinnedTypes::new(links);
57            (
58                pinned_types.next_type()?,
59                pinned_types.next_type()?,
60                pinned_types.next_type()?,
61                pinned_types.next_type()?,
62                pinned_types.next_type()?,
63                pinned_types.next_type()?,
64            )
65        };
66
67        let address_to_number_converter = AddressToRawNumberConverter::new();
68        let number_to_address_converter = RawNumberToAddressConverter::new();
69        let balanced_variant_converter = BalancedVariantConverter::new();
70        let unicode_symbol_criterion_matcher = TargetMatcher::new(unicode_symbol_type);
71        let unicode_sequence_criterion_matcher = TargetMatcher::new(unicode_sequence_type);
72        let char_to_unicode_symbol_converter =
73            CharToUnicodeSymbolConverter::new(address_to_number_converter, unicode_symbol_type);
74        let unicode_symbol_to_char_converter = UnicodeSymbolToCharConverter::new(
75            number_to_address_converter,
76            unicode_symbol_criterion_matcher,
77        );
78        let string_to_unicode_sequence_converter = StringToUnicodeSequenceConverter::new(
79            char_to_unicode_symbol_converter,
80            balanced_variant_converter,
81            unicode_sequence_type,
82        );
83        let sequence_walker = RightSequenceWalker::new(unicode_symbol_criterion_matcher);
84        let unicode_sequence_to_string_converter = UnicodeSequenceToStringConverter::new(
85            unicode_sequence_criterion_matcher,
86            sequence_walker,
87            unicode_symbol_to_char_converter,
88            unicode_sequence_type,
89        );
90
91        let mut storage = Self {
92            links,
93            type_type,
94            unicode_symbol_type,
95            unicode_sequence_type,
96            string_type,
97            empty_string_type,
98            name_type,
99            address_to_number_converter,
100            number_to_address_converter,
101            balanced_variant_converter,
102            unicode_symbol_criterion_matcher,
103            unicode_sequence_criterion_matcher,
104            char_to_unicode_symbol_converter,
105            unicode_symbol_to_char_converter,
106            string_to_unicode_sequence_converter,
107            sequence_walker,
108            unicode_sequence_to_string_converter,
109            string_to_unicode_sequence_cache: CachingConverterDecorator::new(),
110            unicode_sequence_to_string_cache: RefCell::new(CachingConverterDecorator::new()),
111        };
112
113        storage.set_name(type_type, "Type")?;
114        storage.set_name(unicode_symbol_type, "UnicodeSymbol")?;
115        storage.set_name(unicode_sequence_type, "UnicodeSequence")?;
116        storage.set_name(string_type, "String")?;
117        storage.set_name(empty_string_type, "EmptyString")?;
118        storage.set_name(name_type, "Name")?;
119
120        Ok(storage)
121    }
122
123    pub fn links_mut(&mut self) -> &mut LinkStorage {
124        self.links
125    }
126
127    pub fn into_named_links(self) -> NamedLinks<'a> {
128        NamedLinks::from_storage(self)
129    }
130
131    pub fn type_type(&self) -> u32 {
132        self.type_type
133    }
134
135    pub fn unicode_symbol_type(&self) -> u32 {
136        self.unicode_symbol_type
137    }
138
139    pub fn unicode_sequence_type(&self) -> u32 {
140        self.unicode_sequence_type
141    }
142
143    pub fn string_type(&self) -> u32 {
144        self.string_type
145    }
146
147    pub fn empty_string_type(&self) -> u32 {
148        self.empty_string_type
149    }
150
151    pub fn name_type(&self) -> u32 {
152        self.name_type
153    }
154
155    pub fn address_to_number_converter(&self) -> AddressToRawNumberConverter {
156        self.address_to_number_converter
157    }
158
159    pub fn number_to_address_converter(&self) -> RawNumberToAddressConverter {
160        self.number_to_address_converter
161    }
162
163    pub fn balanced_variant_converter(&self) -> BalancedVariantConverter {
164        self.balanced_variant_converter
165    }
166
167    pub fn unicode_symbol_criterion_matcher(&self) -> TargetMatcher {
168        self.unicode_symbol_criterion_matcher
169    }
170
171    pub fn unicode_sequence_criterion_matcher(&self) -> TargetMatcher {
172        self.unicode_sequence_criterion_matcher
173    }
174
175    pub fn char_to_unicode_symbol_converter(&self) -> CharToUnicodeSymbolConverter {
176        self.char_to_unicode_symbol_converter
177    }
178
179    pub fn unicode_symbol_to_char_converter(&self) -> UnicodeSymbolToCharConverter {
180        self.unicode_symbol_to_char_converter
181    }
182
183    pub fn string_to_unicode_sequence_converter(&self) -> StringToUnicodeSequenceConverter {
184        self.string_to_unicode_sequence_converter
185    }
186
187    pub fn sequence_walker(&self) -> RightSequenceWalker {
188        self.sequence_walker
189    }
190
191    pub fn unicode_sequence_to_string_converter(&self) -> UnicodeSequenceToStringConverter {
192        self.unicode_sequence_to_string_converter
193    }
194
195    pub fn create_string(&mut self, content: &str) -> Result<u32> {
196        let string_sequence = self.get_string_sequence(content);
197        Ok(self.links.get_or_create(self.string_type, string_sequence))
198    }
199
200    pub fn get_string(&self, string_value: u32) -> Result<String> {
201        let mut current = string_value;
202        for _ in 0..3 {
203            let Some(link) = self.links.get(current) else {
204                break;
205            };
206            if link.source == self.string_type {
207                return if link.target == self.empty_string_type {
208                    Ok(String::new())
209                } else {
210                    self.unicode_sequence_to_string(link.target)
211                };
212            }
213            current = link.target;
214        }
215        bail!("The passed link does not contain a string.")
216    }
217
218    pub fn unicode_sequence_code_units(&self, string_value: u32) -> Result<Vec<u16>> {
219        let sequence = self.unwrap_string_sequence(string_value)?;
220        if sequence == self.empty_string_type {
221            return Ok(Vec::new());
222        }
223        if !self
224            .unicode_sequence_criterion_matcher
225            .is_matched(self.links, sequence)
226        {
227            bail!("Link {sequence} is not a Unicode sequence.");
228        }
229        let unicode_sequence = self
230            .links
231            .get(sequence)
232            .ok_or_else(|| anyhow::anyhow!("Unicode sequence link {sequence} does not exist."))?;
233
234        self.sequence_walker
235            .walk(self.links, unicode_sequence.source)
236            .into_iter()
237            .map(|symbol| {
238                self.unicode_symbol_to_char_converter
239                    .convert(self.links, symbol)
240            })
241            .collect()
242    }
243
244    pub fn get_types(&self) -> Vec<u32> {
245        self.links
246            .query(None, Some(self.type_type), None)
247            .into_iter()
248            .map(|link| link.index)
249            .collect()
250    }
251
252    pub fn is_type(&self, address: u32) -> bool {
253        self.links
254            .get(address)
255            .is_some_and(|link| link.source == self.type_type)
256    }
257
258    pub fn get_or_create_type(&mut self, name: &str) -> Result<u32> {
259        if let Some(existing) = self.get_by_name(name)? {
260            return Ok(existing);
261        }
262
263        let type_link = self.links.create(0, 0);
264        self.links.update(type_link, self.type_type, type_link)?;
265        self.set_name(type_link, name)?;
266        Ok(type_link)
267    }
268
269    pub fn set_name_for_external_reference(&mut self, link: u32, name: &str) -> Result<u32> {
270        self.set_name(external_reference(link), name)
271    }
272
273    pub fn get_name_by_external_reference(&self, link: u32) -> Result<Option<String>> {
274        self.get_name(external_reference(link))
275    }
276
277    pub fn get_external_reference_by_name(&mut self, name: &str) -> Result<Option<u32>> {
278        Ok(self.get_by_name(name)?.and_then(external_reference_value))
279    }
280
281    pub fn remove_name_by_external_reference(&mut self, external_reference_id: u32) -> Result<()> {
282        self.remove_name(external_reference(external_reference_id))
283    }
284
285    pub fn set_name(&mut self, link: u32, name: &str) -> Result<u32> {
286        let name_sequence = self.create_string(name)?;
287        let name_link = self.links.get_or_create(self.name_type, name_sequence);
288        Ok(self.links.get_or_create(link, name_link))
289    }
290
291    pub fn get_name(&self, link: u32) -> Result<Option<String>> {
292        for name_pair in self.links.query(None, Some(link), None) {
293            let name_candidate = name_pair.target;
294            let Some(candidate) = self.links.get(name_candidate) else {
295                continue;
296            };
297            if candidate.source == self.name_type {
298                return self.get_string(candidate.target).map(Some);
299            }
300        }
301        Ok(None)
302    }
303
304    pub fn get_by_name(&mut self, name: &str) -> Result<Option<u32>> {
305        let name_sequence = self.create_string(name)?;
306        let Some(name_link) = self.links.search(self.name_type, name_sequence) else {
307            return Ok(None);
308        };
309        Ok(self
310            .links
311            .query(None, None, Some(name_link))
312            .into_iter()
313            .map(|link| link.source)
314            .next())
315    }
316
317    pub fn remove_name(&mut self, link: u32) -> Result<()> {
318        let name_pairs = self
319            .links
320            .query(None, Some(link), None)
321            .into_iter()
322            .map(|link| (link.index, link.target))
323            .collect::<Vec<_>>();
324
325        for (name_pair, name_candidate) in name_pairs {
326            let Some(candidate) = self.links.get(name_candidate).copied() else {
327                continue;
328            };
329            if candidate.source != self.name_type {
330                continue;
331            }
332
333            if self.links.exists(name_pair) {
334                self.links.delete(name_pair)?;
335            }
336
337            let still_used = self
338                .links
339                .query(None, None, Some(name_candidate))
340                .into_iter()
341                .any(|usage| usage.index != name_pair);
342            if !still_used && self.links.exists(name_candidate) {
343                self.links.delete(name_candidate)?;
344            }
345        }
346
347        Ok(())
348    }
349
350    fn get_string_sequence(&mut self, content: &str) -> u32 {
351        if content.is_empty() {
352            self.empty_string_type
353        } else {
354            self.string_to_unicode_sequence(content)
355        }
356    }
357
358    fn string_to_unicode_sequence(&mut self, content: &str) -> u32 {
359        let input = content.to_string();
360        if let Some(cached) = self.string_to_unicode_sequence_cache.get(&input) {
361            return cached;
362        }
363
364        let converter = self.string_to_unicode_sequence_converter;
365        let sequence = converter.convert(self.links, content);
366        self.string_to_unicode_sequence_cache
367            .insert(input, sequence)
368    }
369
370    fn unicode_sequence_to_string(&self, sequence: u32) -> Result<String> {
371        if let Some(cached) = self
372            .unicode_sequence_to_string_cache
373            .borrow()
374            .get(&sequence)
375        {
376            return Ok(cached);
377        }
378
379        let output = self
380            .unicode_sequence_to_string_converter
381            .convert(self.links, sequence)?;
382        self.unicode_sequence_to_string_cache
383            .borrow_mut()
384            .insert(sequence, output.clone());
385        Ok(output)
386    }
387
388    fn unwrap_string_sequence(&self, string_value: u32) -> Result<u32> {
389        let mut current = string_value;
390        for _ in 0..3 {
391            let Some(link) = self.links.get(current) else {
392                break;
393            };
394            if link.source == self.string_type {
395                return Ok(link.target);
396            }
397            current = link.target;
398        }
399        bail!("The passed link does not contain a string.")
400    }
401}