Skip to main content

protobuf/
string.rs

1// Protocol Buffers - Google's data interchange format
2// Copyright 2023 Google LLC.  All rights reserved.
3//
4// Use of this source code is governed by a BSD-style
5// license that can be found in the LICENSE file or at
6// https://developers.google.com/open-source/licenses/bsd
7
8//! Items specific to `bytes` and `string` fields.
9#![allow(dead_code)]
10#![allow(unused)]
11
12use crate::__internal::runtime::InnerProtoString;
13use crate::__internal::{Private, SealedInternal};
14use crate::{AsView, IntoProxied, IntoView, MapKey, Mut, MutProxied, Proxied, View};
15use std::borrow::Cow;
16use std::cmp::{Eq, Ord, Ordering, PartialEq, PartialOrd};
17use std::convert::{AsMut, AsRef};
18use std::ffi::{OsStr, OsString};
19use std::fmt;
20use std::hash::{Hash, Hasher};
21use std::iter;
22use std::ops::{Deref, DerefMut};
23use std::ptr;
24use std::rc::Rc;
25use std::sync::Arc;
26
27pub struct ProtoBytes {
28    pub(crate) inner: InnerProtoString,
29}
30
31impl ProtoBytes {
32    // Returns the kernel-specific container. This method is private in spirit and
33    // must not be called by a user.
34    #[doc(hidden)]
35    pub fn into_inner(self, _private: Private) -> InnerProtoString {
36        self.inner
37    }
38
39    #[doc(hidden)]
40    pub fn from_inner(_private: Private, inner: InnerProtoString) -> ProtoBytes {
41        Self { inner }
42    }
43
44    pub fn as_view(&self) -> &[u8] {
45        self.inner.as_bytes()
46    }
47}
48
49impl AsRef<[u8]> for ProtoBytes {
50    fn as_ref(&self) -> &[u8] {
51        self.inner.as_bytes()
52    }
53}
54
55impl From<&[u8]> for ProtoBytes {
56    fn from(v: &[u8]) -> ProtoBytes {
57        ProtoBytes { inner: InnerProtoString::from(v) }
58    }
59}
60
61impl<const N: usize> From<&[u8; N]> for ProtoBytes {
62    fn from(v: &[u8; N]) -> ProtoBytes {
63        ProtoBytes { inner: InnerProtoString::from(v.as_ref()) }
64    }
65}
66
67impl SealedInternal for ProtoBytes {}
68
69impl Proxied for ProtoBytes {
70    type View<'msg> = &'msg [u8];
71}
72
73impl AsView for ProtoBytes {
74    type Proxied = Self;
75
76    fn as_view(&self) -> &[u8] {
77        self.as_view()
78    }
79}
80
81impl IntoProxied<ProtoBytes> for &[u8] {
82    fn into_proxied(self, _private: Private) -> ProtoBytes {
83        ProtoBytes::from(self)
84    }
85}
86
87impl<const N: usize> IntoProxied<ProtoBytes> for &[u8; N] {
88    fn into_proxied(self, _private: Private) -> ProtoBytes {
89        ProtoBytes::from(self.as_ref())
90    }
91}
92
93impl IntoProxied<ProtoBytes> for Vec<u8> {
94    fn into_proxied(self, _private: Private) -> ProtoBytes {
95        ProtoBytes::from(AsRef::<[u8]>::as_ref(&self))
96    }
97}
98
99impl IntoProxied<ProtoBytes> for &Vec<u8> {
100    fn into_proxied(self, _private: Private) -> ProtoBytes {
101        ProtoBytes::from(AsRef::<[u8]>::as_ref(self))
102    }
103}
104
105impl IntoProxied<ProtoBytes> for Box<[u8]> {
106    fn into_proxied(self, _private: Private) -> ProtoBytes {
107        ProtoBytes::from(AsRef::<[u8]>::as_ref(&self))
108    }
109}
110
111impl IntoProxied<ProtoBytes> for Cow<'_, [u8]> {
112    fn into_proxied(self, _private: Private) -> ProtoBytes {
113        ProtoBytes::from(AsRef::<[u8]>::as_ref(&self))
114    }
115}
116
117impl IntoProxied<ProtoBytes> for Rc<[u8]> {
118    fn into_proxied(self, _private: Private) -> ProtoBytes {
119        ProtoBytes::from(AsRef::<[u8]>::as_ref(&self))
120    }
121}
122
123impl IntoProxied<ProtoBytes> for Arc<[u8]> {
124    fn into_proxied(self, _private: Private) -> ProtoBytes {
125        ProtoBytes::from(AsRef::<[u8]>::as_ref(&self))
126    }
127}
128
129impl SealedInternal for &[u8] {}
130
131impl AsView for &[u8] {
132    type Proxied = ProtoBytes;
133
134    fn as_view(&self) -> &[u8] {
135        self
136    }
137}
138
139impl<'msg> IntoView<'msg> for &'msg [u8] {
140    fn into_view<'shorter>(self) -> &'shorter [u8]
141    where
142        'msg: 'shorter,
143    {
144        self
145    }
146}
147
148/// The bytes were not valid UTF-8.
149#[derive(Debug, PartialEq)]
150pub struct Utf8Error {
151    pub(crate) inner: std::str::Utf8Error,
152}
153impl std::fmt::Display for Utf8Error {
154    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
155        self.inner.fmt(f)
156    }
157}
158
159impl std::error::Error for Utf8Error {}
160
161impl From<std::str::Utf8Error> for Utf8Error {
162    fn from(inner: std::str::Utf8Error) -> Utf8Error {
163        Utf8Error { inner }
164    }
165}
166
167/// An owned type representing protobuf `string` field's contents.
168///
169/// # UTF-8
170///
171/// Protobuf intends to maintain the invariant that a `string` fields are UTF-8 encoded text, and
172/// by default the validity of the UTF-8 encoding is enforced at parse time.
173///
174/// However, the Rust implementation is designed to zero-copy integrate with C++Proto. C++Proto is
175/// designed such that string fields should be valid UTF-8, and generally the validity is checked at
176/// parse time, but it is not undefined behavior to set malformed UTF-8 data. For the reason,
177/// RustProto uses a 'should-be-UTF-8' types, but it is not considered undefined behavior to set
178/// arbitrary &[u8] onto a string field.
179///
180/// Doing so should be done with great caution however, as it can lead to difficult to debug
181/// issues and problems in downstream systems.
182///
183///
184/// `ProtoString` represents a string type that is expected to contain valid
185/// UTF-8. However, `ProtoString` is not validated, so users must
186/// call [`ProtoString::to_string`] to perform a (possibly runtime-elided) UTF-8
187/// validation check. This validation should rarely fail in pure Rust programs,
188/// but is necessary to prevent UB when interacting with C++, or other languages
189/// with looser restrictions.
190///
191///
192/// # `Display` and `ToString`
193/// `ProtoString` is ordinarily UTF-8 and so implements `Display`. If there are
194/// any invalid UTF-8 sequences, they are replaced with [`U+FFFD REPLACEMENT
195/// CHARACTER`]. Because anything implementing `Display` also implements
196/// `ToString`, `ProtoString::to_string()` is equivalent to
197/// `String::from_utf8_lossy(proto_string.as_bytes()).into_owned()`.
198///
199/// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER
200pub struct ProtoString {
201    pub(crate) inner: InnerProtoString,
202}
203
204impl ProtoString {
205    pub fn as_view(&self) -> &ProtoStr {
206        ProtoStr::from_utf8_unchecked(self.as_bytes())
207    }
208
209    pub fn as_bytes(&self) -> &[u8] {
210        self.inner.as_bytes()
211    }
212
213    /// Converts bytes to a `ProtoString` without a check. Prefer using `.try_into()`
214    /// where possible.
215    ///
216    /// The input `bytes` should be valid UTF-8. Note that unlike with `str` this
217    /// method is not unsafe, as the underlying implementations are robust against
218    /// invalid UTF-8 and this will not result in language undefined behavior.
219    ///
220    /// However, `string` fields are intended to maintain the invariant that they
221    /// contain valid UTF-8, and the system behavior if invalid UTF-8 is contained may be
222    /// poor, including that that you could end up storing malformed data which is not parsable.
223    pub fn from_utf8_unchecked(v: &[u8]) -> Self {
224        Self { inner: InnerProtoString::from(v) }
225    }
226
227    // Returns the kernel-specific container. This method is private in spirit and
228    // must not be called by a user.
229    #[doc(hidden)]
230    pub fn into_inner(self, _private: Private) -> InnerProtoString {
231        self.inner
232    }
233
234    #[doc(hidden)]
235    pub fn from_inner(_private: Private, inner: InnerProtoString) -> ProtoString {
236        Self { inner }
237    }
238}
239
240impl SealedInternal for ProtoString {}
241
242impl Deref for ProtoString {
243    type Target = ProtoStr;
244
245    fn deref(&self) -> &Self::Target {
246        self.as_view()
247    }
248}
249
250impl AsRef<[u8]> for ProtoString {
251    fn as_ref(&self) -> &[u8] {
252        self.inner.as_bytes()
253    }
254}
255
256impl From<ProtoString> for ProtoBytes {
257    fn from(v: ProtoString) -> Self {
258        ProtoBytes { inner: v.inner }
259    }
260}
261
262impl From<&str> for ProtoString {
263    fn from(v: &str) -> Self {
264        Self::from_utf8_unchecked(v.as_bytes())
265    }
266}
267
268impl TryFrom<&[u8]> for ProtoString {
269    type Error = Utf8Error;
270
271    fn try_from(v: &[u8]) -> Result<Self, Self::Error> {
272        let s = std::str::from_utf8(v)?;
273        Ok(ProtoString::from(s))
274    }
275}
276
277impl SealedInternal for &str {}
278
279impl SealedInternal for &ProtoStr {}
280
281impl IntoProxied<ProtoString> for &str {
282    fn into_proxied(self, _private: Private) -> ProtoString {
283        ProtoString::from(self)
284    }
285}
286
287impl IntoProxied<ProtoString> for &ProtoStr {
288    fn into_proxied(self, _private: Private) -> ProtoString {
289        ProtoString::from_utf8_unchecked(self.as_bytes())
290    }
291}
292
293impl IntoProxied<ProtoString> for String {
294    fn into_proxied(self, _private: Private) -> ProtoString {
295        ProtoString::from(self.as_str())
296    }
297}
298
299impl IntoProxied<ProtoString> for &String {
300    fn into_proxied(self, _private: Private) -> ProtoString {
301        ProtoString::from_utf8_unchecked(self.as_bytes())
302    }
303}
304
305impl IntoProxied<ProtoString> for OsString {
306    fn into_proxied(self, private: Private) -> ProtoString {
307        self.as_os_str().into_proxied(private)
308    }
309}
310
311impl IntoProxied<ProtoString> for &OsStr {
312    fn into_proxied(self, _private: Private) -> ProtoString {
313        ProtoString::from_utf8_unchecked(self.as_encoded_bytes())
314    }
315}
316
317impl IntoProxied<ProtoString> for Box<str> {
318    fn into_proxied(self, _private: Private) -> ProtoString {
319        ProtoString::from(AsRef::<str>::as_ref(&self))
320    }
321}
322
323impl IntoProxied<ProtoString> for Cow<'_, str> {
324    fn into_proxied(self, _private: Private) -> ProtoString {
325        ProtoString::from(AsRef::<str>::as_ref(&self))
326    }
327}
328
329impl IntoProxied<ProtoString> for Rc<str> {
330    fn into_proxied(self, _private: Private) -> ProtoString {
331        ProtoString::from(AsRef::<str>::as_ref(&self))
332    }
333}
334
335impl IntoProxied<ProtoString> for Arc<str> {
336    fn into_proxied(self, _private: Private) -> ProtoString {
337        ProtoString::from(AsRef::<str>::as_ref(&self))
338    }
339}
340
341/// A shared immutable view of a protobuf `string` field's contents.
342///
343/// Like a `str`, it can be cheaply accessed as bytes and
344/// is dynamically sized, requiring it be accessed through a pointer.
345///
346/// # UTF-8 and `&str` access
347///
348/// Protobuf [docs] state that a `string` field contains UTF-8 encoded text.
349/// However, not every runtime enforces this, and the Rust runtime is designed
350/// to integrate with other runtimes with FFI, like C++.
351///
352/// Because of this, in order to access the contents as a `&str`, users must
353/// call [`ProtoStr::to_str`] to perform a (possibly runtime-elided) UTF-8
354/// validation check. However, the Rust API only allows `set()`ting a `string`
355/// field with data should be valid UTF-8 like a `&str` or a
356/// `&ProtoStr`. This means that this check should rarely fail, but is necessary
357/// to prevent UB when interacting with C++, which has looser restrictions.
358///
359/// Most of the time, users should not perform direct `&str` access to the
360/// contents - this type implements `Display` and comparison with `str`,
361/// so it's best to avoid a UTF-8 check by working directly with `&ProtoStr`
362/// or converting to `&[u8]`.
363///
364/// # `Display` and `ToString`
365/// `ProtoStr` is ordinarily UTF-8 and so implements `Display`. If there are
366/// any invalid UTF-8 sequences, they are replaced with [`U+FFFD REPLACEMENT
367/// CHARACTER`]. Because anything implementing `Display` also implements
368/// `ToString`, `proto_str.to_string()` is equivalent to
369/// `String::from_utf8_lossy(proto_str.as_bytes()).into_owned()`.
370///
371/// [docs]: https://protobuf.dev/programming-guides/proto2/#scalar
372/// [dst]: https://doc.rust-lang.org/reference/dynamically-sized-types.html
373/// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER
374#[repr(transparent)]
375pub struct ProtoStr([u8]);
376
377impl ProtoStr {
378    /// Converts `self` to a byte slice.
379    ///
380    /// Note: this type does not implement `Deref`; you must call `as_bytes()`
381    /// or `AsRef<[u8]>` to get access to bytes.
382    pub const fn as_bytes(&self) -> &[u8] {
383        &self.0
384    }
385
386    /// Yields a `&str` slice if `self` contains valid UTF-8.
387    ///
388    /// This may perform a runtime check, dependent on runtime.
389    ///
390    /// `String::from_utf8_lossy(proto_str.as_bytes())` can be used to
391    /// infallibly construct a string, replacing invalid UTF-8 with
392    /// [`U+FFFD REPLACEMENT CHARACTER`].
393    ///
394    /// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER
395    // This is not `try_to_str` since `to_str` is shorter, with `CStr` as precedent.
396    pub const fn to_str(&self) -> Result<&str, Utf8Error> {
397        // Note: cannot use `?` here because of the `const` context.
398        match std::str::from_utf8(&self.0) {
399            Ok(s) => Ok(s),
400            Err(e) => Err(Utf8Error { inner: e }),
401        }
402    }
403
404    /// Converts `self` to a string, including invalid characters.
405    ///
406    /// Invalid UTF-8 sequences are replaced with
407    /// [`U+FFFD REPLACEMENT CHARACTER`].
408    ///
409    /// Users should be prefer this to `.to_string()` provided by `Display`.
410    /// `.to_cow_lossy()` is the same operation, but it may avoid an
411    /// allocation if the string is already UTF-8.
412    ///
413    /// [`U+FFFD REPLACEMENT CHARACTER`]: std::char::REPLACEMENT_CHARACTER
414    //
415    // This method is named `to_string_lossy` in `CStr`, but since `to_string`
416    // also exists on this type, this name was chosen to avoid confusion.
417    pub fn to_cow_lossy(&self) -> Cow<'_, str> {
418        String::from_utf8_lossy(&self.0)
419    }
420
421    /// Returns `true` if `self` has a length of zero bytes.
422    pub const fn is_empty(&self) -> bool {
423        self.0.is_empty()
424    }
425
426    /// Returns the length of `self`.
427    ///
428    /// Like `&str`, this is a length in bytes, not `char`s or graphemes.
429    pub const fn len(&self) -> usize {
430        self.0.len()
431    }
432
433    /// Converts bytes to a `&ProtoStr` without a check. Prefer using `.try_into()`
434    /// where possible.
435    ///
436    /// The input `bytes` should be valid UTF-8. Note that unlike with `str` this
437    /// method is not unsafe, as the underlying implementations are robust against
438    /// invalid UTF-8 and this will not result in language undefined behavior.
439    ///
440    /// However, `string` fields are intended to maintain the invariant that they
441    /// contain valid UTF-8, and the system behavior if invalid UTF-8 is contained may be
442    /// poor, including that that you could end up storing malformed data which is not parsable.
443    pub const fn from_utf8_unchecked(bytes: &[u8]) -> &Self {
444        // SAFETY:
445        // - `ProtoStr` is `#[repr(transparent)]` over `[u8]`, so it has the same
446        //   layout.
447        // - `ProtoStr` has the same pointer metadata and element size as `[u8]`.
448        unsafe { &*(bytes as *const [u8] as *const Self) }
449    }
450
451    /// Interprets a string slice as a `&ProtoStr`.
452    pub const fn from_str(string: &str) -> &Self {
453        Self::from_utf8_unchecked(string.as_bytes())
454    }
455
456    pub const fn is_ascii(&self) -> bool {
457        self.0.is_ascii()
458    }
459
460    pub fn contains<T>(&self, other: &T) -> bool
461    where
462        T: AsRef<[u8]> + ?Sized,
463    {
464        let other = other.as_ref();
465        if other.is_empty() {
466            return true;
467        }
468        // Note: this sliding window approach is suboptimal, but simple and correct and can be
469        // optimized later if needed.
470        self.0.windows(other.len()).any(|window| window == other)
471    }
472
473    pub fn starts_with<T>(&self, other: &T) -> bool
474    where
475        T: AsRef<[u8]> + ?Sized,
476    {
477        self.0.starts_with(other.as_ref())
478    }
479
480    pub fn ends_with<T>(&self, other: &T) -> bool
481    where
482        T: AsRef<[u8]> + ?Sized,
483    {
484        self.0.ends_with(other.as_ref())
485    }
486
487    pub fn find<T>(&self, other: &T) -> Option<usize>
488    where
489        T: AsRef<[u8]> + ?Sized,
490    {
491        let other = other.as_ref();
492        if other.is_empty() {
493            return Some(0);
494        }
495        // Note: this sliding window approach is suboptimal, but simple and correct and can be
496        // optimized later if needed.
497        self.0.windows(other.len()).position(|window| window == other)
498    }
499
500    pub const fn trim_ascii(&self) -> &Self {
501        Self::from_utf8_unchecked(self.0.trim_ascii())
502    }
503
504    pub const fn trim_ascii_start(&self) -> &Self {
505        Self::from_utf8_unchecked(self.0.trim_ascii_start())
506    }
507
508    pub const fn trim_ascii_end(&self) -> &Self {
509        Self::from_utf8_unchecked(self.0.trim_ascii_end())
510    }
511}
512
513impl AsRef<[u8]> for ProtoStr {
514    fn as_ref(&self) -> &[u8] {
515        self.as_bytes()
516    }
517}
518
519impl<'msg> From<&'msg ProtoStr> for &'msg [u8] {
520    fn from(val: &'msg ProtoStr) -> &'msg [u8] {
521        val.as_bytes()
522    }
523}
524
525impl<'msg> From<&'msg str> for &'msg ProtoStr {
526    fn from(val: &'msg str) -> &'msg ProtoStr {
527        ProtoStr::from_str(val)
528    }
529}
530
531impl<'msg> TryFrom<&'msg ProtoStr> for &'msg str {
532    type Error = Utf8Error;
533
534    fn try_from(val: &'msg ProtoStr) -> Result<&'msg str, Utf8Error> {
535        val.to_str()
536    }
537}
538
539impl<'msg> TryFrom<&'msg [u8]> for &'msg ProtoStr {
540    type Error = Utf8Error;
541
542    fn try_from(val: &'msg [u8]) -> Result<&'msg ProtoStr, Utf8Error> {
543        let s = std::str::from_utf8(val)?;
544        Ok(ProtoStr::from_str(s))
545    }
546}
547
548impl fmt::Debug for ProtoStr {
549    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
550        write!(f, "\"");
551        for chunk in self.as_bytes().utf8_chunks() {
552            for ch in chunk.valid().chars() {
553                write!(f, "{}", ch.escape_debug());
554            }
555            for byte in chunk.invalid() {
556                // Format byte as \xff.
557                write!(f, "\\x{:02X}", byte);
558            }
559        }
560        write!(f, "\"");
561        Ok(())
562    }
563}
564
565impl fmt::Display for ProtoStr {
566    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
567        fmt::Display::fmt(&String::from_utf8_lossy(self.as_bytes()), f)?;
568        Ok(())
569    }
570}
571
572impl Hash for ProtoStr {
573    fn hash<H: Hasher>(&self, state: &mut H) {
574        self.as_bytes().hash(state)
575    }
576}
577
578impl Eq for ProtoStr {}
579impl Ord for ProtoStr {
580    fn cmp(&self, other: &ProtoStr) -> Ordering {
581        self.as_bytes().cmp(other.as_bytes())
582    }
583}
584
585impl Proxied for ProtoString {
586    type View<'msg> = &'msg ProtoStr;
587}
588
589impl MapKey for ProtoString {}
590
591impl AsView for ProtoString {
592    type Proxied = Self;
593
594    fn as_view(&self) -> &ProtoStr {
595        self.as_view()
596    }
597}
598
599impl AsView for &ProtoStr {
600    type Proxied = ProtoString;
601
602    fn as_view(&self) -> &ProtoStr {
603        self
604    }
605}
606
607impl<'msg> IntoView<'msg> for &'msg ProtoStr {
608    fn into_view<'shorter>(self) -> &'shorter ProtoStr
609    where
610        'msg: 'shorter,
611    {
612        self
613    }
614}
615
616/// Implements `PartialCmp` and `PartialEq` for the `lhs` against the `rhs`
617/// using `AsRef<[u8]>`.
618// TODO: consider improving to not require a `<()>` if no generics are
619// needed
620macro_rules! impl_bytes_partial_cmp {
621    ($(<($($generics:tt)*)> $lhs:ty => $rhs:ty),+ $(,)?) => {
622        $(
623            impl<$($generics)*> PartialEq<$rhs> for $lhs {
624                fn eq(&self, other: &$rhs) -> bool {
625                    AsRef::<[u8]>::as_ref(self) == AsRef::<[u8]>::as_ref(other)
626                }
627            }
628            impl<$($generics)*> PartialOrd<$rhs> for $lhs {
629                fn partial_cmp(&self, other: &$rhs) -> Option<Ordering> {
630                    AsRef::<[u8]>::as_ref(self).partial_cmp(AsRef::<[u8]>::as_ref(other))
631                }
632            }
633        )*
634    };
635}
636
637impl_bytes_partial_cmp!(
638    // `ProtoStr` against protobuf types
639    <()> ProtoStr => ProtoStr,
640
641    // `ProtoStr` against foreign types
642    <()> ProtoStr => str,
643    <()> str => ProtoStr,
644);
645
646impl std::fmt::Debug for ProtoString {
647    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
648        std::fmt::Debug::fmt(self.as_view(), f)
649    }
650}
651
652impl std::fmt::Debug for ProtoBytes {
653    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
654        std::fmt::Debug::fmt(self.as_view(), f)
655    }
656}
657
658unsafe impl Sync for ProtoString {}
659unsafe impl Send for ProtoString {}
660
661unsafe impl Send for ProtoBytes {}
662unsafe impl Sync for ProtoBytes {}
663
664#[cfg(test)]
665mod tests {
666    use super::*;
667    use googletest::prelude::*;
668
669    // TODO: Add unit tests
670
671    #[gtest]
672    fn test_proto_string_try_from() -> googletest::Result<()> {
673        let valid_utf8 = b"hello";
674        let s = ProtoString::try_from(&valid_utf8[..])?;
675        verify_eq!(s.as_bytes(), valid_utf8)?;
676
677        let invalid_utf8 = b"\xff";
678        let res = ProtoString::try_from(&invalid_utf8[..]);
679        verify_that!(res, err(anything()))?;
680        Ok(())
681    }
682
683    #[gtest]
684    fn test_proto_string_from_utf8_unchecked() -> googletest::Result<()> {
685        let invalid_utf8 = b"\xff";
686        let s = ProtoString::from_utf8_unchecked(invalid_utf8);
687        verify_eq!(s.as_bytes(), invalid_utf8)?;
688        Ok(())
689    }
690
691    #[gtest]
692    fn test_proto_str_methods() -> googletest::Result<()> {
693        let s = ProtoStr::from_str("  hello world  ");
694
695        // contains
696        verify_eq!(s.contains(s), true)?;
697        verify_eq!(s.contains("hello"), true)?;
698        verify_eq!(s.contains("world"), true)?;
699        verify_eq!(s.contains("o w"), true)?;
700        verify_eq!(s.contains("xyz"), false)?;
701        verify_eq!(s.contains(""), true)?;
702
703        // starts_with / ends_with
704        verify_eq!(s.starts_with(s), true)?;
705        verify_eq!(s.ends_with(s), true)?;
706        verify_eq!(s.starts_with("  he"), true)?;
707        verify_eq!(s.ends_with("d  "), true)?;
708        verify_eq!(s.starts_with("hel"), false)?;
709
710        // find
711        verify_eq!(s.find(s), Some(0))?;
712        verify_eq!(s.find("hello"), Some(2))?;
713        verify_eq!(s.find("world"), Some(8))?;
714        verify_eq!(s.find("xyz"), None)?;
715        verify_eq!(s.find(""), Some(0))?;
716
717        // trim
718        verify_eq!(s.trim_ascii(), "hello world")?;
719        verify_eq!(s.trim_ascii_start(), "hello world  ")?;
720        verify_eq!(s.trim_ascii_end(), "  hello world")?;
721
722        Ok(())
723    }
724
725    #[gtest]
726    fn test_proto_string_deref() -> googletest::Result<()> {
727        let s = ProtoString::from("  hello  ");
728        verify_eq!(s.contains("hello"), true)?;
729        verify_eq!(s.trim_ascii(), "hello")?;
730
731        let s2 = ProtoStr::from_str("he");
732        verify_eq!(s.contains(s2), true)?;
733
734        let s2 = ProtoStr::from_str("world");
735        verify_eq!(s.contains(s2), false)?;
736
737        Ok(())
738    }
739
740    #[gtest]
741    fn test_const_proto_str() -> googletest::Result<()> {
742        const S: &ProtoStr = ProtoStr::from_str("hello");
743        verify_eq!(S.contains("hello"), true)?;
744
745        const S_BYTES: &[u8] = S.as_bytes();
746        verify_eq!(S_BYTES, b"hello")?;
747
748        const S_TO_STR: core::result::Result<&str, Utf8Error> = S.to_str();
749        verify_eq!(S_TO_STR.unwrap(), "hello")?;
750
751        const S_IS_EMPTY: bool = S.is_empty();
752        verify_eq!(S_IS_EMPTY, false)?;
753        const EMPTY: &ProtoStr = ProtoStr::from_str("");
754        const EMPTY_IS_EMPTY: bool = EMPTY.is_empty();
755        verify_eq!(EMPTY_IS_EMPTY, true)?;
756
757        const S_LEN: usize = S.len();
758        verify_eq!(S_LEN, 5)?;
759
760        const S_IS_ASCII: bool = S.is_ascii();
761        verify_eq!(S_IS_ASCII, true)?;
762
763        const TRIM_ME: &ProtoStr = ProtoStr::from_str("  foo  ");
764        const TRIMMED: &ProtoStr = TRIM_ME.trim_ascii();
765        const TRIMMED_START: &ProtoStr = TRIM_ME.trim_ascii_start();
766        const TRIMMED_END: &ProtoStr = TRIM_ME.trim_ascii_end();
767        verify_eq!(TRIMMED.as_bytes(), b"foo")?;
768        verify_eq!(TRIMMED_START.as_bytes(), b"foo  ")?;
769        verify_eq!(TRIMMED_END.as_bytes(), b"  foo")?;
770
771        const S2: &ProtoStr = ProtoStr::from_utf8_unchecked(b"world");
772        verify_eq!(S2.contains("world"), true)?;
773
774        Ok(())
775    }
776}