ion_rs/symbol_table.rs
1use std::collections::HashMap;
2use std::sync::Arc;
3
4use crate::constants::v1_0;
5use crate::types::{Symbol, SymbolId};
6
7/// Stores mappings from Symbol IDs to text and vice-versa.
8// SymbolTable instances always have at least system symbols; they are never empty.
9#[allow(clippy::len_without_is_empty)]
10pub struct SymbolTable {
11 symbols_by_id: Vec<Symbol>,
12 ids_by_text: HashMap<Symbol, SymbolId>,
13}
14
15impl Default for SymbolTable {
16 fn default() -> Self {
17 Self::new()
18 }
19}
20
21impl SymbolTable {
22 /// Constructs a new symbol table pre-populated with the system symbols defined in the spec.
23 pub fn new() -> SymbolTable {
24 let mut symbol_table = SymbolTable {
25 symbols_by_id: Vec::with_capacity(v1_0::SYSTEM_SYMBOLS.len()),
26 ids_by_text: HashMap::new(),
27 };
28 symbol_table.initialize();
29 symbol_table
30 }
31
32 // Interns the v1.0 system symbols
33 fn initialize(&mut self) {
34 for &text in v1_0::SYSTEM_SYMBOLS.iter() {
35 self.intern_or_add_placeholder(text);
36 }
37 }
38
39 pub fn reset(&mut self) {
40 self.symbols_by_id.clear();
41 self.ids_by_text.clear();
42 self.initialize();
43 }
44
45 /// If `text` is already in the symbol table, returns the corresponding [SymbolId].
46 /// Otherwise, adds `text` to the symbol table and returns the newly assigned [SymbolId].
47 pub fn intern<A: AsRef<str>>(&mut self, text: A) -> SymbolId {
48 let text = text.as_ref();
49 // If the text is already in the symbol table, return the ID associated with it.
50 if let Some(id) = self.ids_by_text.get(text) {
51 return *id;
52 }
53
54 // Otherwise, intern it and return the new ID.
55 let id = self.symbols_by_id.len();
56 let arc: Arc<str> = Arc::from(text);
57 let symbol = Symbol::shared(arc);
58 self.symbols_by_id.push(symbol.clone());
59 self.ids_by_text.insert(symbol, id);
60 id
61 }
62
63 /// Assigns unknown text to the next available symbol ID. This is used when an Ion reader
64 /// encounters null or non-string values in a stream's symbol table.
65 pub fn add_placeholder(&mut self) -> SymbolId {
66 let sid = self.symbols_by_id.len();
67 self.symbols_by_id.push(Symbol::unknown_text());
68 sid
69 }
70
71 /// If `maybe_text` is `Some(text)`, this method is equivalent to `intern(text)`.
72 /// If `maybe_text` is `None`, this method is equivalent to `add_placeholder()`.
73 pub fn intern_or_add_placeholder<A: AsRef<str>>(&mut self, maybe_text: Option<A>) -> SymbolId {
74 match maybe_text {
75 Some(text) => self.intern(text),
76 None => self.add_placeholder(),
77 }
78 }
79
80 /// If defined, returns the Symbol ID associated with the provided text.
81 pub fn sid_for<A: AsRef<str>>(&self, text: &A) -> Option<SymbolId> {
82 self.ids_by_text.get(text.as_ref()).copied()
83 }
84
85 /// If defined, returns the text associated with the provided Symbol ID.
86 pub fn text_for(&self, sid: SymbolId) -> Option<&str> {
87 self.symbols_by_id
88 // If the SID is out of bounds, returns None
89 .get(sid)?
90 // If the text is unknown, returns None
91 .text()
92 }
93
94 /// If defined, returns the Symbol associated with the provided Symbol ID.
95 pub fn symbol_for(&self, sid: SymbolId) -> Option<&Symbol> {
96 self.symbols_by_id.get(sid)
97 }
98
99 /// Returns true if the provided symbol ID maps to an entry in the symbol table (i.e. it is in
100 /// the range of known symbols: 0 to max_id)
101 ///
102 /// Note that a symbol ID can be valid but map to unknown text. If a symbol table contains
103 /// a null or non-string value, that entry in the table will be defined but not have text
104 /// associated with it.
105 ///
106 /// This method allows users to distinguish between a SID with unknown text and a SID that is
107 /// invalid.
108 pub fn sid_is_valid(&self, sid: SymbolId) -> bool {
109 sid < self.symbols_by_id.len()
110 }
111
112 /// Returns a slice of references to the symbol text stored in the table.
113 ///
114 /// The symbol table can contain symbols with unknown text; see the documentation for
115 /// [Symbol] for more information.
116 pub fn symbols(&self) -> &[Symbol] {
117 &self.symbols_by_id
118 }
119
120 /// Returns a slice of references to the symbol text stored in the table starting at the given
121 /// symbol ID. If a symbol table append occurs during reading, this function can be used to
122 /// easily view the new symbols that has been added to the table.
123 ///
124 /// The symbol table can contain symbols with unknown text; see the documentation for
125 /// [Symbol] for more information.
126 pub fn symbols_tail(&self, start: usize) -> &[Symbol] {
127 &self.symbols_by_id[start..]
128 }
129
130 /// Returns the number of symbols defined in the table.
131 pub fn len(&self) -> usize {
132 self.symbols_by_id.len()
133 }
134}