ion_rs/
symbol_table.rs

1use std::collections::HashMap;
2use std::sync::Arc;
3
4use crate::constants::v1_0;
5use crate::types::{Symbol, SymbolId};
6
7/// Stores mappings from Symbol IDs to text and vice-versa.
8// SymbolTable instances always have at least system symbols; they are never empty.
9#[allow(clippy::len_without_is_empty)]
10pub struct SymbolTable {
11    symbols_by_id: Vec<Symbol>,
12    ids_by_text: HashMap<Symbol, SymbolId>,
13}
14
15impl Default for SymbolTable {
16    fn default() -> Self {
17        Self::new()
18    }
19}
20
21impl SymbolTable {
22    /// Constructs a new symbol table pre-populated with the system symbols defined in the spec.
23    pub fn new() -> SymbolTable {
24        let mut symbol_table = SymbolTable {
25            symbols_by_id: Vec::with_capacity(v1_0::SYSTEM_SYMBOLS.len()),
26            ids_by_text: HashMap::new(),
27        };
28        symbol_table.initialize();
29        symbol_table
30    }
31
32    // Interns the v1.0 system symbols
33    fn initialize(&mut self) {
34        for &text in v1_0::SYSTEM_SYMBOLS.iter() {
35            self.intern_or_add_placeholder(text);
36        }
37    }
38
39    pub fn reset(&mut self) {
40        self.symbols_by_id.clear();
41        self.ids_by_text.clear();
42        self.initialize();
43    }
44
45    /// If `text` is already in the symbol table, returns the corresponding [SymbolId].
46    /// Otherwise, adds `text` to the symbol table and returns the newly assigned [SymbolId].
47    pub fn intern<A: AsRef<str>>(&mut self, text: A) -> SymbolId {
48        let text = text.as_ref();
49        // If the text is already in the symbol table, return the ID associated with it.
50        if let Some(id) = self.ids_by_text.get(text) {
51            return *id;
52        }
53
54        // Otherwise, intern it and return the new ID.
55        let id = self.symbols_by_id.len();
56        let arc: Arc<str> = Arc::from(text);
57        let symbol = Symbol::shared(arc);
58        self.symbols_by_id.push(symbol.clone());
59        self.ids_by_text.insert(symbol, id);
60        id
61    }
62
63    /// Assigns unknown text to the next available symbol ID. This is used when an Ion reader
64    /// encounters null or non-string values in a stream's symbol table.
65    pub fn add_placeholder(&mut self) -> SymbolId {
66        let sid = self.symbols_by_id.len();
67        self.symbols_by_id.push(Symbol::unknown_text());
68        sid
69    }
70
71    /// If `maybe_text` is `Some(text)`, this method is equivalent to `intern(text)`.
72    /// If `maybe_text` is `None`, this method is equivalent to `add_placeholder()`.
73    pub fn intern_or_add_placeholder<A: AsRef<str>>(&mut self, maybe_text: Option<A>) -> SymbolId {
74        match maybe_text {
75            Some(text) => self.intern(text),
76            None => self.add_placeholder(),
77        }
78    }
79
80    /// If defined, returns the Symbol ID associated with the provided text.
81    pub fn sid_for<A: AsRef<str>>(&self, text: &A) -> Option<SymbolId> {
82        self.ids_by_text.get(text.as_ref()).copied()
83    }
84
85    /// If defined, returns the text associated with the provided Symbol ID.
86    pub fn text_for(&self, sid: SymbolId) -> Option<&str> {
87        self.symbols_by_id
88            // If the SID is out of bounds, returns None
89            .get(sid)?
90            // If the text is unknown, returns None
91            .text()
92    }
93
94    /// If defined, returns the Symbol associated with the provided Symbol ID.
95    pub fn symbol_for(&self, sid: SymbolId) -> Option<&Symbol> {
96        self.symbols_by_id.get(sid)
97    }
98
99    /// Returns true if the provided symbol ID maps to an entry in the symbol table (i.e. it is in
100    /// the range of known symbols: 0 to max_id)
101    ///
102    /// Note that a symbol ID can be valid but map to unknown text. If a symbol table contains
103    /// a null or non-string value, that entry in the table will be defined but not have text
104    /// associated with it.
105    ///
106    /// This method allows users to distinguish between a SID with unknown text and a SID that is
107    /// invalid.
108    pub fn sid_is_valid(&self, sid: SymbolId) -> bool {
109        sid < self.symbols_by_id.len()
110    }
111
112    /// Returns a slice of references to the symbol text stored in the table.
113    ///
114    /// The symbol table can contain symbols with unknown text; see the documentation for
115    /// [Symbol] for more information.
116    pub fn symbols(&self) -> &[Symbol] {
117        &self.symbols_by_id
118    }
119
120    /// Returns a slice of references to the symbol text stored in the table starting at the given
121    /// symbol ID. If a symbol table append occurs during reading, this function can be used to
122    /// easily view the new symbols that has been added to the table.
123    ///
124    /// The symbol table can contain symbols with unknown text; see the documentation for
125    /// [Symbol] for more information.
126    pub fn symbols_tail(&self, start: usize) -> &[Symbol] {
127        &self.symbols_by_id[start..]
128    }
129
130    /// Returns the number of symbols defined in the table.
131    pub fn len(&self) -> usize {
132        self.symbols_by_id.len()
133    }
134}