1pub fn generate_vector_id(entity_type: &str, entity_id: &str, chunk_index: usize) -> String {
5 format!("{}::{}::chunk_{}", entity_type, entity_id, chunk_index)
6}
7
8pub fn get_timestamp() -> u64 {
10 #[cfg(target_family = "wasm")]
11 {
12 ic_cdk::api::time()
13 }
14
15 #[cfg(not(target_family = "wasm"))]
16 {
17 use std::time::{SystemTime, UNIX_EPOCH};
18 SystemTime::now()
19 .duration_since(UNIX_EPOCH)
20 .unwrap()
21 .as_nanos() as u64
22 }
23}
24
25pub fn sanitize_text(text: &str) -> String {
27 text.split_whitespace()
28 .collect::<Vec<&str>>()
29 .join(" ")
30}
31
32pub fn truncate_text(text: &str, max_len: usize) -> String {
34 if text.len() <= max_len {
35 text.to_string()
36 } else {
37 format!("{}...", &text[..max_len])
38 }
39}
40
41pub fn format_bytes(bytes: u64) -> String {
43 const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"];
44 let mut size = bytes as f64;
45 let mut unit_idx = 0;
46
47 while size >= 1024.0 && unit_idx < UNITS.len() - 1 {
48 size /= 1024.0;
49 unit_idx += 1;
50 }
51
52 format!("{:.2} {}", size, UNITS[unit_idx])
53}
54
55#[cfg(test)]
56mod tests {
57 use super::*;
58
59 #[test]
60 fn test_generate_vector_id() {
61 let id = generate_vector_id("User", "123", 0);
62 assert_eq!(id, "User::123::chunk_0");
63 }
64
65 #[test]
66 fn test_sanitize_text() {
67 let text = "Hello world\n\n test";
68 let sanitized = sanitize_text(text);
69 assert_eq!(sanitized, "Hello world test");
70 }
71
72 #[test]
73 fn test_truncate_text() {
74 let text = "Hello world";
75 assert_eq!(truncate_text(text, 5), "Hello...");
76 assert_eq!(truncate_text(text, 100), "Hello world");
77 }
78
79 #[test]
80 fn test_format_bytes() {
81 assert_eq!(format_bytes(512), "512.00 B");
82 assert_eq!(format_bytes(1536), "1.50 KB");
83 assert_eq!(format_bytes(1_048_576), "1.00 MB");
84 }
85}