Skip to main content

reifydb_type/value/blob/
utf8.rs

1// SPDX-License-Identifier: MIT
2// Copyright (c) 2025 ReifyDB
3
4use super::Blob;
5use crate::{
6	error::{BlobEncodingKind, Error, TypeError},
7	fragment::Fragment,
8};
9
10impl Blob {
11	pub fn from_utf8(fragment: Fragment) -> Self {
12		let fragment = fragment;
13		let utf8_str = fragment.text();
14		Blob::new(utf8_str.as_bytes().to_vec())
15	}
16
17	pub fn to_utf8(&self) -> Result<String, Error> {
18		match str::from_utf8(self.as_bytes()) {
19			Ok(s) => Ok(s.to_string()),
20			Err(e) => Err(TypeError::BlobEncoding {
21				kind: BlobEncodingKind::InvalidUtf8Sequence {
22					error: e.to_string(),
23				},
24				message: format!("Invalid UTF-8 sequence in BLOB: {}", e),
25				fragment: Fragment::internal(e.to_string()),
26			}
27			.into()),
28		}
29	}
30
31	pub fn to_utf8_lossy(&self) -> String {
32		String::from_utf8_lossy(self.as_bytes()).to_string()
33	}
34
35	pub fn from_str(fragment: Fragment) -> Self {
36		Self::from_utf8(fragment)
37	}
38}
39
40#[cfg(test)]
41pub mod tests {
42	use super::*;
43	use crate::fragment::Fragment;
44
45	#[test]
46	fn test_from_utf8() {
47		let blob = Blob::from_utf8(Fragment::testing("Hello, World!"));
48		assert_eq!(blob.as_bytes(), b"Hello, World!");
49	}
50
51	#[test]
52	fn test_from_utf8_unicode() {
53		let blob = Blob::from_utf8(Fragment::testing("Hello, δΈ–η•Œ! πŸ¦€"));
54		assert_eq!(blob.as_bytes(), "Hello, δΈ–η•Œ! πŸ¦€".as_bytes());
55	}
56
57	#[test]
58	fn test_from_utf8_empty() {
59		let blob = Blob::from_utf8(Fragment::testing(""));
60		assert_eq!(blob.as_bytes(), b"");
61	}
62
63	#[test]
64	fn test_to_utf8() {
65		let blob = Blob::new("Hello, δΈ–η•Œ!".as_bytes().to_vec());
66		assert_eq!(blob.to_utf8().unwrap(), "Hello, δΈ–η•Œ!");
67	}
68
69	#[test]
70	fn test_to_utf8_invalid() {
71		let blob = Blob::new(vec![0xFF, 0xFE]);
72		assert!(blob.to_utf8().is_err());
73	}
74
75	#[test]
76	fn test_to_utf8_lossy() {
77		let blob = Blob::new("Hello, δΈ–η•Œ!".as_bytes().to_vec());
78		assert_eq!(blob.to_utf8_lossy(), "Hello, δΈ–η•Œ!");
79
80		let invalid_blob = Blob::new(vec![0xFF, 0xFE]);
81		let lossy = invalid_blob.to_utf8_lossy();
82		assert!(lossy.contains('οΏ½')); // replacement character
83	}
84
85	#[test]
86	fn test_from_str() {
87		let blob = Blob::from_str(Fragment::testing("Hello!"));
88		assert_eq!(blob.as_bytes(), b"Hello!");
89	}
90
91	#[test]
92	fn test_utf8_roundtrip() {
93		let original = "Hello, δΈ–η•Œ! πŸ¦€ Test with emojis and unicode";
94		let blob = Blob::from_utf8(Fragment::testing(original));
95		let decoded = blob.to_utf8().unwrap();
96		assert_eq!(decoded, original);
97	}
98}