use reovim_driver_codec::{CodecError, CodecMetadata, ContentCodec, ContentType, DecodeResult};
const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF];
pub const META_BOM: &str = "bom";
pub const META_LINE_ENDING: &str = "line_ending";
pub const LINE_ENDING_CRLF: &str = "crlf";
pub const LINE_ENDING_LF: &str = "lf";
pub struct Utf8Codec;
impl Utf8Codec {
#[must_use]
pub const fn new() -> Self {
Self
}
}
#[cfg_attr(coverage_nightly, coverage(off))]
impl Default for Utf8Codec {
fn default() -> Self {
Self::new()
}
}
impl ContentCodec for Utf8Codec {
fn decode(&self, raw: &[u8]) -> Result<DecodeResult, CodecError> {
let (has_bom, content_bytes) = if raw.starts_with(UTF8_BOM) {
(true, &raw[UTF8_BOM.len()..])
} else {
(false, raw)
};
let text = std::str::from_utf8(content_bytes).map_err(|e| CodecError::InvalidSequence {
offset: e.valid_up_to() + if has_bom { UTF8_BOM.len() } else { 0 },
detail: format!("invalid UTF-8 at byte {}", e.valid_up_to()),
})?;
let has_crlf = text.contains("\r\n");
let content = if has_crlf {
text.replace("\r\n", "\n")
} else {
text.to_string()
};
let mut metadata = CodecMetadata::new(ContentType::new(ContentType::UTF8));
if has_bom {
metadata.set(META_BOM, "true");
}
if has_crlf {
metadata.set(META_LINE_ENDING, LINE_ENDING_CRLF);
} else {
metadata.set(META_LINE_ENDING, LINE_ENDING_LF);
}
Ok(DecodeResult {
content,
annotations: vec![],
metadata,
lossy: false,
readonly: false,
})
}
fn encode(
&self,
content: &str,
metadata: &CodecMetadata,
) -> Option<Result<Vec<u8>, CodecError>> {
let text = if metadata.get(META_LINE_ENDING) == Some(LINE_ENDING_CRLF) {
content.replace('\n', "\r\n")
} else {
content.to_string()
};
let mut bytes = Vec::with_capacity(text.len() + 3);
if metadata.get(META_BOM) == Some("true") {
bytes.extend_from_slice(UTF8_BOM);
}
bytes.extend_from_slice(text.as_bytes());
Some(Ok(bytes))
}
}
#[cfg(test)]
#[path = "codec_tests.rs"]
mod tests;