1use crate::ensure;
19use crate::error::Error;
20use crate::meta::string_util;
21use std::sync::OnceLock;
22
23const SHORT_MAX_VALUE: usize = 32767;
25pub static NAMESPACE_ENCODER: MetaStringEncoder = MetaStringEncoder::new('.', '_');
28pub static TYPE_NAME_ENCODER: MetaStringEncoder = MetaStringEncoder::new('$', '_');
29pub static FIELD_NAME_ENCODER: MetaStringEncoder = MetaStringEncoder::new('$', '_');
30
31pub static NAMESPACE_DECODER: MetaStringDecoder = MetaStringDecoder::new('.', '_');
32pub static FIELD_NAME_DECODER: MetaStringDecoder = MetaStringDecoder::new('$', '_');
33pub static TYPE_NAME_DECODER: MetaStringDecoder = MetaStringDecoder::new('$', '_');
34
35#[derive(Debug, PartialEq, Hash, Eq, Clone, Copy, Default)]
36#[repr(i16)]
37pub enum Encoding {
38 #[default]
39 Utf8 = 0x00,
40 LowerSpecial = 0x01,
41 LowerUpperDigitSpecial = 0x02,
42 FirstToLowerSpecial = 0x03,
43 AllToLowerSpecial = 0x04,
44}
45
46#[derive(Debug, Clone, Default)]
47pub struct MetaString {
48 pub original: String,
49 pub encoding: Encoding,
50 pub bytes: Vec<u8>,
51 pub strip_last_char: bool,
52 pub special_char1: char,
53 pub special_char2: char,
54}
55
56impl PartialEq for MetaString {
57 fn eq(&self, other: &Self) -> bool {
58 self.bytes == other.bytes
59 }
60}
61
62impl Eq for MetaString {}
63
64impl std::hash::Hash for MetaString {
65 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
66 self.bytes.hash(state);
67 }
68}
69
70static EMPTY: OnceLock<MetaString> = OnceLock::new();
71
72impl MetaString {
73 pub fn new(
74 original: String,
75 encoding: Encoding,
76 bytes: Vec<u8>,
77 special_char1: char,
78 special_char2: char,
79 ) -> Result<Self, Error> {
80 let mut strip_last_char = false;
81 if encoding != Encoding::Utf8 {
82 if bytes.is_empty() {
83 return Err(Error::encode_error("Encoded data cannot be empty"));
84 }
85 strip_last_char = (bytes[0] & 0x80) != 0;
86 }
87 Ok(MetaString {
88 original,
89 encoding,
90 bytes,
91 strip_last_char,
92 special_char1,
93 special_char2,
94 })
95 }
96
97 pub fn write_to(&self, writer: &mut crate::buffer::Writer) {
98 writer.write_var_uint32(self.bytes.len() as u32);
99 writer.write_bytes(&self.bytes);
100 }
101
102 pub fn get_empty() -> &'static MetaString {
103 EMPTY.get_or_init(|| MetaString {
104 original: "".to_string(),
105 encoding: Encoding::default(),
106 bytes: vec![],
107 strip_last_char: false,
108 special_char1: '\0',
109 special_char2: '\0',
110 })
111 }
112}
113
114#[derive(Clone)]
115pub struct MetaStringDecoder {
116 pub special_char1: char,
117 pub special_char2: char,
118}
119
120#[derive(Clone)]
121pub struct MetaStringEncoder {
122 pub special_char1: char,
123 pub special_char2: char,
124}
125
126#[derive(Debug)]
127struct StringStatistics {
128 digit_count: usize,
129 upper_count: usize,
130 can_lower_upper_digit_special_encoded: bool,
131 can_lower_special_encoded: bool,
132}
133
134impl MetaStringEncoder {
135 pub const fn new(special_char1: char, special_char2: char) -> Self {
136 Self {
137 special_char1,
138 special_char2,
139 }
140 }
141
142 fn is_latin(&self, s: &str) -> bool {
143 string_util::is_latin(s)
144 }
145
146 fn _encode(&self, input: &str) -> Result<Option<MetaString>, Error> {
147 if input.is_empty() {
148 return Ok(Some(MetaString::new(
149 input.to_string(),
150 Encoding::Utf8,
151 vec![],
152 self.special_char1,
153 self.special_char2,
154 )?));
155 }
156
157 ensure!(
158 input.len() < SHORT_MAX_VALUE,
159 Error::encode_error(format!(
160 "Meta string is too long, max:{SHORT_MAX_VALUE}, current:{}",
161 input.len()
162 ))
163 );
164
165 if !self.is_latin(input) {
166 return Ok(Some(MetaString::new(
167 input.to_string(),
168 Encoding::Utf8,
169 input.as_bytes().to_vec(),
170 self.special_char1,
171 self.special_char2,
172 )?));
173 }
174
175 Ok(None)
176 }
177
178 pub fn encode(&self, input: &str) -> Result<MetaString, Error> {
179 if let Some(ms) = self._encode(input)? {
180 return Ok(ms);
181 }
182 let encoding = self.compute_encoding(input, None);
183 self.encode_with_encoding(input, encoding)
184 }
185
186 pub fn encode_with_encodings(
187 &self,
188 input: &str,
189 encodings: &[Encoding],
190 ) -> Result<MetaString, Error> {
191 if let Some(ms) = self._encode(input)? {
192 return Ok(ms);
193 }
194 let encoding = self.compute_encoding(input, Some(encodings));
195 self.encode_with_encoding(input, encoding)
196 }
197
198 fn compute_encoding(&self, input: &str, encodings: Option<&[Encoding]>) -> Encoding {
199 let allow = |e: Encoding| encodings.map_or(true, |opts| opts.contains(&e));
200 let statistics = self.compute_statistics(input);
201 if statistics.can_lower_special_encoded && allow(Encoding::LowerSpecial) {
202 return Encoding::LowerSpecial;
203 }
204 if statistics.can_lower_upper_digit_special_encoded {
205 if statistics.digit_count != 0 && allow(Encoding::LowerUpperDigitSpecial) {
206 return Encoding::LowerUpperDigitSpecial;
207 }
208 let upper_count: usize = statistics.upper_count;
209 if upper_count == 1
210 && input.chars().next().unwrap().is_uppercase()
211 && allow(Encoding::FirstToLowerSpecial)
212 {
213 return Encoding::FirstToLowerSpecial;
214 }
215 if ((input.len() + upper_count) * 5) < (input.len() * 6)
216 && allow(Encoding::AllToLowerSpecial)
217 {
218 return Encoding::AllToLowerSpecial;
219 }
220 if allow(Encoding::LowerUpperDigitSpecial) {
221 return Encoding::LowerUpperDigitSpecial;
222 }
223 }
224 Encoding::Utf8
225 }
226
227 fn compute_statistics(&self, chars: &str) -> StringStatistics {
228 let mut can_lower_upper_digit_special_encoded = true;
229 let mut can_lower_special_encoded = true;
230 let mut digit_count = 0;
231 let mut upper_count = 0;
232 for c in chars.chars() {
233 if can_lower_upper_digit_special_encoded
234 && !(c.is_lowercase()
235 || c.is_uppercase()
236 || c.is_ascii_digit()
237 || (c == self.special_char1 || c == self.special_char2))
238 {
239 can_lower_upper_digit_special_encoded = false;
240 }
241 if can_lower_special_encoded
242 && !(c.is_lowercase() || matches!(c, '.' | '_' | '$' | '|'))
243 {
244 can_lower_special_encoded = false;
245 }
246 if c.is_ascii_digit() {
247 digit_count += 1;
248 }
249 if c.is_uppercase() {
250 upper_count += 1;
251 }
252 }
253 StringStatistics {
254 digit_count,
255 upper_count,
256 can_lower_upper_digit_special_encoded,
257 can_lower_special_encoded,
258 }
259 }
260
261 pub fn encode_with_encoding(
262 &self,
263 input: &str,
264 encoding: Encoding,
265 ) -> Result<MetaString, Error> {
266 if input.is_empty() {
267 return MetaString::new(
268 input.to_string(),
269 Encoding::Utf8,
270 vec![],
271 self.special_char1,
272 self.special_char2,
273 );
274 }
275 ensure!(
276 input.len() < SHORT_MAX_VALUE,
277 Error::encode_error(format!(
278 "Meta string is too long, max:{SHORT_MAX_VALUE}, current:{}",
279 input.len()
280 ))
281 );
282 ensure!(
283 encoding == Encoding::Utf8 || self.is_latin(input),
284 Error::encode_error("Non-ASCII characters in meta string are not allowed")
285 );
286
287 if input.is_empty() {
288 return MetaString::new(
289 input.to_string(),
290 Encoding::Utf8,
291 vec![],
292 self.special_char1,
293 self.special_char2,
294 );
295 };
296
297 match encoding {
298 Encoding::LowerSpecial => {
299 let encoded_data = self.encode_lower_special(input)?;
300 MetaString::new(
301 input.to_string(),
302 encoding,
303 encoded_data,
304 self.special_char1,
305 self.special_char2,
306 )
307 }
308 Encoding::LowerUpperDigitSpecial => {
309 let encoded_data = self.encode_lower_upper_digit_special(input)?;
310 MetaString::new(
311 input.to_string(),
312 encoding,
313 encoded_data,
314 self.special_char1,
315 self.special_char2,
316 )
317 }
318 Encoding::FirstToLowerSpecial => {
319 let encoded_data = self.encode_first_to_lower_special(input)?;
320 MetaString::new(
321 input.to_string(),
322 encoding,
323 encoded_data,
324 self.special_char1,
325 self.special_char2,
326 )
327 }
328 Encoding::AllToLowerSpecial => {
329 let upper_count = input.chars().filter(|c| c.is_uppercase()).count();
330 let encoded_data = self.encode_all_to_lower_special(input, upper_count)?;
331 MetaString::new(
332 input.to_string(),
333 encoding,
334 encoded_data,
335 self.special_char1,
336 self.special_char2,
337 )
338 }
339 Encoding::Utf8 => {
340 let encoded_data = input.as_bytes().to_vec();
341 MetaString::new(
342 input.to_string(),
343 Encoding::Utf8,
344 encoded_data,
345 self.special_char1,
346 self.special_char2,
347 )
348 }
349 }
350 }
351
352 fn encode_generic(&self, input: &str, bits_per_char: u8) -> Result<Vec<u8>, Error> {
353 let total_bits: usize = input.len() * bits_per_char as usize + 1;
354 let byte_length: usize = (total_bits + 7) / 8;
355 let mut bytes = vec![0; byte_length];
356 let mut current_bit = 1;
357 for c in input.chars() {
358 let value = self.char_to_value(c, bits_per_char)?;
359 for i in (0..bits_per_char).rev() {
360 if (value & (1 << i)) != 0 {
361 let byte_pos: usize = current_bit / 8;
362 let bit_pos: usize = current_bit % 8;
363 bytes[byte_pos] |= 1 << (7 - bit_pos);
364 }
365 current_bit += 1;
366 }
367 }
368 if byte_length * 8 >= total_bits + bits_per_char as usize {
369 bytes[0] |= 0x80;
370 }
371 Ok(bytes)
372 }
373 pub fn encode_lower_special(&self, input: &str) -> Result<Vec<u8>, Error> {
374 self.encode_generic(input, 5)
375 }
376
377 pub fn encode_lower_upper_digit_special(&self, input: &str) -> Result<Vec<u8>, Error> {
378 self.encode_generic(input, 6)
379 }
380
381 pub fn encode_first_to_lower_special(&self, input: &str) -> Result<Vec<u8>, Error> {
382 if input.is_empty() {
383 return self.encode_generic("", 5);
384 }
385
386 let mut iter = input.char_indices();
387 let (first_idx, first_char) = iter.next().unwrap();
388
389 let lower = first_char.to_lowercase().to_string();
390
391 if lower.len() == first_char.len_utf8() && first_char.is_ascii() {
394 let mut bytes = input.as_bytes().to_owned();
395 bytes[first_idx] = lower.as_bytes()[0];
396 return self.encode_generic(std::str::from_utf8(&bytes).unwrap(), 5);
397 }
398
399 let (_, rest) = input.split_at(first_char.len_utf8());
401 let mut result = String::with_capacity(input.len() + lower.len() - first_char.len_utf8());
402 result.push_str(&lower);
403 result.push_str(rest);
404 self.encode_generic(&result, 5)
405 }
406
407 pub fn encode_all_to_lower_special(
408 &self,
409 input: &str,
410 upper_count: usize,
411 ) -> Result<Vec<u8>, Error> {
412 let mut new_chars = Vec::with_capacity(input.len() + upper_count);
413 for c in input.chars() {
414 if c.is_uppercase() {
415 new_chars.push('|');
416 new_chars.push(c.to_lowercase().next().unwrap());
417 } else {
418 new_chars.push(c);
419 }
420 }
421 self.encode_generic(&new_chars.iter().collect::<String>(), 5)
422 }
423
424 fn char_to_value(&self, c: char, bits_per_char: u8) -> Result<u8, Error> {
425 match bits_per_char {
426 5 => match c {
427 'a'..='z' => Ok(c as u8 - b'a'),
428 '.' => Ok(26),
429 '_' => Ok(27),
430 '$' => Ok(28),
431 '|' => Ok(29),
432 _ => Err(Error::encode_error(format!(
433 "Unsupported character for LOWER_UPPER_DIGIT_SPECIAL encoding: {c}",
434 )))?,
435 },
436 6 => match c {
437 'a'..='z' => Ok(c as u8 - b'a'),
438 'A'..='Z' => Ok(c as u8 - b'A' + 26),
439 '0'..='9' => Ok(c as u8 - b'0' + 52),
440 _ => {
441 if c == self.special_char1 {
442 Ok(62)
443 } else if c == self.special_char2 {
444 Ok(63)
445 } else {
446 Err(Error::encode_error(format!(
447 "Invalid character value for LOWER_SPECIAL decoding: {c:?}",
448 )))?
449 }
450 }
451 },
452 _ => unreachable!(),
453 }
454 }
455}
456
457impl MetaStringDecoder {
458 pub const fn new(special_char1: char, special_char2: char) -> Self {
459 MetaStringDecoder {
460 special_char1,
461 special_char2,
462 }
463 }
464
465 pub fn decode(&self, encoded_data: &[u8], encoding: Encoding) -> Result<MetaString, Error> {
466 let str = {
467 if encoded_data.is_empty() {
468 Ok("".to_string())
469 } else {
470 match encoding {
471 Encoding::LowerSpecial => self.decode_lower_special(encoded_data),
472 Encoding::LowerUpperDigitSpecial => {
473 self.decode_lower_upper_digit_special(encoded_data)
474 }
475 Encoding::FirstToLowerSpecial => {
476 self.decode_rep_first_lower_special(encoded_data)
477 }
478 Encoding::AllToLowerSpecial => {
479 self.decode_rep_all_to_lower_special(encoded_data)
480 }
481 Encoding::Utf8 => Ok(String::from_utf8_lossy(encoded_data).into_owned()),
482 }
483 }
484 }?;
485 MetaString::new(
486 str,
487 encoding,
488 Vec::from(encoded_data),
489 self.special_char1,
490 self.special_char2,
491 )
492 }
493
494 fn decode_lower_special(&self, data: &[u8]) -> Result<String, Error> {
495 let mut decoded = String::new();
496 let total_bits: usize = data.len() * 8;
497 let strip_last_char = (data[0] & 0x80) != 0;
498 let bit_mask: usize = 0b11111;
499 let mut bit_index = 1;
500 while bit_index + 5 <= total_bits && !(strip_last_char && (bit_index + 2 * 5 > total_bits))
501 {
502 let byte_index = bit_index / 8;
503 let intra_byte_index = bit_index % 8;
504 let char_value: usize = if intra_byte_index > 3 {
505 ((((data[byte_index] as usize) << 8)
506 | if byte_index + 1 < data.len() {
507 data.get(byte_index + 1).cloned().unwrap() as usize & 0xFF
508 } else {
509 0
510 })
511 >> (11 - intra_byte_index))
512 & bit_mask
513 } else {
514 ((data[byte_index] as usize) >> (3 - intra_byte_index)) & bit_mask
515 };
516 bit_index += 5;
517 decoded.push(self.decode_lower_special_char(char_value as u8)?);
518 }
519 Ok(decoded)
520 }
521
522 fn decode_lower_upper_digit_special(&self, data: &[u8]) -> Result<String, Error> {
523 let mut decoded = String::new();
524 let num_bits = data.len() * 8;
525 let strip_last_char = (data[0] & 0x80) != 0;
526 let mut bit_index = 1;
527 let bit_mask: usize = 0b111111;
528 while bit_index + 6 <= num_bits && !(strip_last_char && (bit_index + 2 * 6 > num_bits)) {
529 let byte_index = bit_index / 8;
530 let intra_byte_index = bit_index % 8;
531 let char_value: usize = if intra_byte_index > 2 {
532 ((((data[byte_index] as usize) << 8)
533 | if byte_index + 1 < data.len() {
534 data.get(byte_index + 1).cloned().unwrap() as usize & 0xFF
535 } else {
536 0
537 })
538 >> (10 - intra_byte_index))
539 & bit_mask
540 } else {
541 ((data[byte_index] as usize) >> (2 - intra_byte_index)) & bit_mask
542 };
543 bit_index += 6;
544 decoded.push(self.decode_lower_upper_digit_special_char(char_value as u8)?);
545 }
546 Ok(decoded)
547 }
548
549 fn decode_lower_special_char(&self, char_value: u8) -> Result<char, Error> {
550 match char_value {
551 0..=25 => Ok((b'a' + char_value) as char),
552 26 => Ok('.'),
553 27 => Ok('_'),
554 28 => Ok('$'),
555 29 => Ok('|'),
556 _ => Err(Error::encode_error(format!(
557 "Invalid character value for LOWER_SPECIAL decoding: {char_value}",
558 )))?,
559 }
560 }
561
562 fn decode_lower_upper_digit_special_char(&self, char_value: u8) -> Result<char, Error> {
563 match char_value {
564 0..=25 => Ok((b'a' + char_value) as char),
565 26..=51 => Ok((b'A' + char_value - 26) as char),
566 52..=61 => Ok((b'0' + char_value - 52) as char),
567 62 => Ok(self.special_char1),
568 63 => Ok(self.special_char2),
569 _ => Err(Error::encode_error(format!(
570 "Invalid character value for LOWER_UPPER_DIGIT_SPECIAL decoding: {char_value}",
571 )))?,
572 }
573 }
574
575 fn decode_rep_first_lower_special(&self, data: &[u8]) -> Result<String, Error> {
576 let decoded_str = self.decode_lower_special(data)?;
577 let mut chars = decoded_str.chars();
578 match chars.next() {
579 Some(first_char) => {
580 let mut result = first_char.to_ascii_uppercase().to_string();
581 result.extend(chars);
582 Ok(result)
583 }
584 None => Ok(decoded_str),
585 }
586 }
587 fn decode_rep_all_to_lower_special(&self, data: &[u8]) -> Result<String, Error> {
588 let decoded_str = self.decode_lower_special(data)?;
589 let mut result = String::new();
590 let mut skip = false;
591 for (i, char) in decoded_str.chars().enumerate() {
592 if skip {
593 skip = false;
594 continue;
595 }
596 if char == '|' {
599 if let Some(next_char) = decoded_str.chars().nth(i + 1) {
600 result.push(next_char.to_ascii_uppercase());
601 }
602 skip = true;
603 } else {
604 result.push(char);
605 }
606 }
607 Ok(result)
608 }
609}