contrag_core/
utils.rs

1/// Utility functions for ContRAG
2
3/// Generate a unique ID for vectors
4pub fn generate_vector_id(entity_type: &str, entity_id: &str, chunk_index: usize) -> String {
5    format!("{}::{}::chunk_{}", entity_type, entity_id, chunk_index)
6}
7
8/// Get current timestamp in nanoseconds (ICP time)
9pub fn get_timestamp() -> u64 {
10    #[cfg(target_family = "wasm")]
11    {
12        ic_cdk::api::time()
13    }
14    
15    #[cfg(not(target_family = "wasm"))]
16    {
17        use std::time::{SystemTime, UNIX_EPOCH};
18        SystemTime::now()
19            .duration_since(UNIX_EPOCH)
20            .unwrap()
21            .as_nanos() as u64
22    }
23}
24
25/// Sanitize text for embedding (remove excessive whitespace, etc.)
26pub fn sanitize_text(text: &str) -> String {
27    text.split_whitespace()
28        .collect::<Vec<&str>>()
29        .join(" ")
30}
31
32/// Truncate text to max length
33pub fn truncate_text(text: &str, max_len: usize) -> String {
34    if text.len() <= max_len {
35        text.to_string()
36    } else {
37        format!("{}...", &text[..max_len])
38    }
39}
40
41/// Format bytes to human-readable string
42pub fn format_bytes(bytes: u64) -> String {
43    const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"];
44    let mut size = bytes as f64;
45    let mut unit_idx = 0;
46
47    while size >= 1024.0 && unit_idx < UNITS.len() - 1 {
48        size /= 1024.0;
49        unit_idx += 1;
50    }
51
52    format!("{:.2} {}", size, UNITS[unit_idx])
53}
54
55#[cfg(test)]
56mod tests {
57    use super::*;
58
59    #[test]
60    fn test_generate_vector_id() {
61        let id = generate_vector_id("User", "123", 0);
62        assert_eq!(id, "User::123::chunk_0");
63    }
64
65    #[test]
66    fn test_sanitize_text() {
67        let text = "Hello    world\n\n  test";
68        let sanitized = sanitize_text(text);
69        assert_eq!(sanitized, "Hello world test");
70    }
71
72    #[test]
73    fn test_truncate_text() {
74        let text = "Hello world";
75        assert_eq!(truncate_text(text, 5), "Hello...");
76        assert_eq!(truncate_text(text, 100), "Hello world");
77    }
78
79    #[test]
80    fn test_format_bytes() {
81        assert_eq!(format_bytes(512), "512.00 B");
82        assert_eq!(format_bytes(1536), "1.50 KB");
83        assert_eq!(format_bytes(1_048_576), "1.00 MB");
84    }
85}