1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
//! The `text` module provides the `Text` type, which is not a MIDI-specific concept. MIDI
//! recommends any text be encoded as ASCII, but there is not enforcement. We provide a `Text` type
//! that holds a `UTF-8` `String` whenever possible, but reverts to holding raw bytes when the bytes
//! are not valid `UTF-8`.
use log::warn;
use std::borrow::Cow;
use std::fmt::{Display, Formatter};
/// The MIDI spec does not state what encoding should be used for strings. Since Rust strings are
/// UTF-8 encoded, we try to parse text as a `String` and hope for the best. But if we get an error
/// then we store the original bytes to facilitate lossless parsing.
#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd, Hash)]
pub enum Text {
/// A UTF-8 encoded string.
Utf8(String),
/// Some bytes that we don't understand, probably a string in some non-UTF-8 encoding.
Other(Vec<u8>),
}
impl Default for Text {
fn default() -> Self {
Text::Utf8(String::new())
}
}
impl Display for Text {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Text::Utf8(s) => Display::fmt(s, f),
Text::Other(b) => write!(f, "{}", String::from_utf8_lossy(b)),
}
}
}
impl From<Vec<u8>> for Text {
fn from(bytes: Vec<u8>) -> Self {
match String::from_utf8(bytes.clone()) {
Ok(s) => Text::Utf8(s),
Err(_) => {
warn!("non UTF-8 string encountered, encoding unknown");
Text::Other(bytes)
}
}
}
}
impl From<String> for Text {
fn from(s: String) -> Self {
Text::Utf8(s)
}
}
impl From<&str> for Text {
fn from(s: &str) -> Self {
Text::Utf8(s.into())
}
}
/// Caution, this will be 'lossy' if the `Text` is not UTF-8 encoded.
impl From<Text> for String {
fn from(t: Text) -> Self {
match t {
Text::Utf8(s) => s,
Text::Other(b) => String::from_utf8_lossy(&b).into(),
}
}
}
impl Text {
pub fn new<S: Into<String>>(s: S) -> Self {
Text::Utf8(s.into())
}
pub fn as_bytes(&self) -> &[u8] {
match self {
Text::Utf8(s) => s.as_bytes(),
Text::Other(b) => b.as_slice(),
}
}
pub fn as_str(&self) -> Cow<'_, str> {
match self {
Text::Utf8(s) => Cow::Borrowed(s.as_str()),
Text::Other(b) => String::from_utf8_lossy(b),
}
}
}