reovim_module_codec_utf8/
codec.rs1use reovim_driver_codec::{CodecError, CodecMetadata, ContentCodec, ContentType, DecodeResult};
7
8const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF];
10
11pub const META_BOM: &str = "bom";
13
14pub const META_LINE_ENDING: &str = "line_ending";
16
17pub const LINE_ENDING_CRLF: &str = "crlf";
19
20pub const LINE_ENDING_LF: &str = "lf";
22
23pub struct Utf8Codec;
30
31impl Utf8Codec {
32 #[must_use]
34 pub const fn new() -> Self {
35 Self
36 }
37}
38
39#[cfg_attr(coverage_nightly, coverage(off))]
40impl Default for Utf8Codec {
41 fn default() -> Self {
42 Self::new()
43 }
44}
45
46impl ContentCodec for Utf8Codec {
47 fn decode(&self, raw: &[u8]) -> Result<DecodeResult, CodecError> {
48 let (has_bom, content_bytes) = if raw.starts_with(UTF8_BOM) {
50 (true, &raw[UTF8_BOM.len()..])
51 } else {
52 (false, raw)
53 };
54
55 let text = std::str::from_utf8(content_bytes).map_err(|e| CodecError::InvalidSequence {
57 offset: e.valid_up_to() + if has_bom { UTF8_BOM.len() } else { 0 },
58 detail: format!("invalid UTF-8 at byte {}", e.valid_up_to()),
59 })?;
60
61 let has_crlf = text.contains("\r\n");
63 let content = if has_crlf {
64 text.replace("\r\n", "\n")
65 } else {
66 text.to_string()
67 };
68
69 let mut metadata = CodecMetadata::new(ContentType::new(ContentType::UTF8));
71 if has_bom {
72 metadata.set(META_BOM, "true");
73 }
74 if has_crlf {
75 metadata.set(META_LINE_ENDING, LINE_ENDING_CRLF);
76 } else {
77 metadata.set(META_LINE_ENDING, LINE_ENDING_LF);
78 }
79
80 Ok(DecodeResult {
81 content,
82 annotations: vec![],
83 metadata,
84 lossy: false,
85 readonly: false,
86 })
87 }
88
89 fn encode(
90 &self,
91 content: &str,
92 metadata: &CodecMetadata,
93 ) -> Option<Result<Vec<u8>, CodecError>> {
94 let text = if metadata.get(META_LINE_ENDING) == Some(LINE_ENDING_CRLF) {
96 content.replace('\n', "\r\n")
97 } else {
98 content.to_string()
99 };
100
101 let mut bytes = Vec::with_capacity(text.len() + 3);
103 if metadata.get(META_BOM) == Some("true") {
104 bytes.extend_from_slice(UTF8_BOM);
105 }
106 bytes.extend_from_slice(text.as_bytes());
107
108 Some(Ok(bytes))
109 }
110}
111
112#[cfg(test)]
113#[path = "codec_tests.rs"]
114mod tests;