Skip to main content

anda_engine/
lib.rs

1//! Runtime implementation for the Anda agent framework.
2//!
3//! `anda_engine` turns the traits and data contracts from `anda_core` into a
4//! runnable agent engine. It provides the execution context, model routing,
5//! storage, hooks, memory tools, remote engine integration, and built-in
6//! extensions used by Anda agents.
7//!
8//! # Main modules
9//! - [`engine`]: engine construction, agent execution, tool calls, and remote
10//!   engine metadata.
11//! - [`context`]: runtime contexts passed to agents and tools, including cache,
12//!   storage, HTTP, Web3, cancellation, and state features.
13//! - [`model`]: model provider adapters and label-based model routing.
14//! - [`extension`]: reusable tools such as filesystem (read, write, search,
15//!   edit), shell, fetch, notes, skills, and todos.
16//! - [`memory`]: persistent conversation and resource memory backed by AndaDB
17//!   and the Cognitive Nexus.
18//! - [`store`]: object storage abstraction used by engine contexts.
19
20use anda_core::Json;
21use candid::Principal;
22use chrono::prelude::*;
23use rand::RngExt;
24use unicode_segmentation::UnicodeSegmentation;
25
26pub mod context;
27pub mod engine;
28pub mod extension;
29pub mod hook;
30pub mod management;
31pub mod memory;
32pub mod model;
33pub mod store;
34pub mod subagent;
35
36/// Returns the current Unix timestamp in milliseconds.
37pub use structured_logger::unix_ms;
38
39/// Generates cryptographically secure random bytes.
40pub use ic_cose::rand_bytes;
41
42/// This is used to represent unauthenticated or anonymous users in the system.
43pub const ANONYMOUS: Principal = Principal::anonymous();
44
45/// User agent string used by Anda Engine HTTP clients.
46pub static APP_USER_AGENT: &str = concat!(
47    "Mozilla/5.0 anda.bot ",
48    env!("CARGO_PKG_NAME"),
49    "/",
50    env!("CARGO_PKG_VERSION"),
51);
52
53/// Generates a random number within the given range.
54pub fn rand_number<T, R>(range: R) -> T
55where
56    T: rand::distr::uniform::SampleUniform,
57    R: rand::distr::uniform::SampleRange<T>,
58{
59    let mut rng = rand::rng();
60    rng.random_range(range)
61}
62
63/// Returns the current UTC datetime in RFC 3339 format with millisecond precision.
64pub fn rfc3339_datetime_now() -> String {
65    Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Millis, true)
66}
67
68/// Converts a Unix timestamp in milliseconds to an RFC 3339 UTC datetime string.
69pub fn rfc3339_datetime(now_ms: u64) -> Option<String> {
70    let datetime = DateTime::<Utc>::from_timestamp_millis(now_ms as i64);
71    datetime.map(|dt| dt.to_rfc3339_opts(chrono::SecondsFormat::Millis, true))
72}
73
74/// Sets the Unix timestamp in milliseconds for each JSON object in the vector.
75pub fn json_set_unix_ms_timestamp(mut vals: Vec<Json>, timestamp_ms: u64) -> Vec<Json> {
76    for val in vals.iter_mut() {
77        if let Some(obj) = val.as_object_mut() {
78            obj.insert("timestamp".into(), timestamp_ms.into());
79        }
80    }
81    vals
82}
83
84/// Converts each JSON object's `timestamp` field from Unix milliseconds to RFC 3339.
85pub fn json_convert_rfc3339_timestamp(mut vals: Vec<Json>) -> Vec<Json> {
86    for val in vals.iter_mut() {
87        if let Some(obj) = val.as_object_mut()
88            && let Some(timestamp_ms) = obj.get("timestamp").and_then(Json::as_u64)
89        {
90            obj.insert("timestamp".into(), rfc3339_datetime(timestamp_ms).into());
91        }
92    }
93    vals
94}
95
96/// Converts a Unix timestamp in milliseconds to a local datetime string in the format "YYYY-MM-DD HH(AM/PM) ±TZ".
97/// Example: 1970-01-01 08AM +08:00
98pub fn local_date_hour(now_ms: u64) -> Option<String> {
99    let local_datetime: Option<DateTime<Local>> =
100        DateTime::<Utc>::from_timestamp_millis(now_ms as i64).map(|d| d.with_timezone(&Local));
101    local_datetime.map(|dt| dt.format("%Y-%m-%d %I%p %:z").to_string())
102}
103
104/// Returns the largest byte length `<= max_bytes` that falls on a grapheme-cluster boundary of
105/// `text`, so multi-codepoint characters — emoji ZWJ sequences (e.g. 👨‍👩‍👧‍👦), regional-indicator
106/// flags, skin-tone modifiers, and combining marks — are never split.
107///
108/// The result is always a valid UTF-8 boundary. It is `text.len()` when the whole string fits, and
109/// `0` when not even the first grapheme cluster fits within `max_bytes`.
110pub fn grapheme_safe_cutoff(text: &str, max_bytes: usize) -> usize {
111    if text.len() <= max_bytes {
112        return text.len();
113    }
114
115    text.grapheme_indices(true)
116        .map(|(idx, g)| idx + g.len())
117        .take_while(|&end| end <= max_bytes)
118        .last()
119        .unwrap_or(0)
120}
121
122/// Truncates a UTF-8 string in place to at most `max_bytes`, backing off to the nearest
123/// grapheme-cluster boundary via [`grapheme_safe_cutoff`] so multi-codepoint characters are never
124/// split.
125///
126/// Returns the new byte length when truncation happened, or `None` when the string already fit
127/// within `max_bytes`. When not even the first grapheme cluster fits, the string is truncated to
128/// empty.
129pub fn truncate_utf8_to_max_bytes(text: &mut String, max_bytes: usize) -> Option<usize> {
130    if text.len() <= max_bytes {
131        return None;
132    }
133
134    let cutoff = grapheme_safe_cutoff(text, max_bytes);
135    text.truncate(cutoff);
136    Some(cutoff)
137}
138
139#[cfg(test)]
140mod tests {
141    use super::*;
142    use chrono::DateTime;
143    use serde_json::json;
144
145    #[test]
146    fn rand_number_returns_value_in_range() {
147        let v: u8 = rand_number(3..10);
148        assert!((3..10).contains(&v));
149    }
150
151    #[test]
152    fn rfc3339_datetime_now_is_valid_rfc3339() {
153        let now = rfc3339_datetime_now();
154        let parsed = DateTime::parse_from_rfc3339(&now).expect("must be valid rfc3339 datetime");
155        assert_eq!(parsed.offset().local_minus_utc(), 0);
156    }
157
158    #[test]
159    fn rfc3339_datetime_handles_valid_and_invalid_timestamp() {
160        let invalid_ms = i64::MIN as u64;
161
162        assert_eq!(
163            rfc3339_datetime(0),
164            Some("1970-01-01T00:00:00.000Z".to_string())
165        );
166        assert_eq!(rfc3339_datetime(invalid_ms), None);
167    }
168
169    #[test]
170    fn json_set_unix_ms_timestamp_updates_only_objects() {
171        let vals = vec![json!({"k": 1}), json!("txt"), json!(null)];
172        let out = json_set_unix_ms_timestamp(vals, 1234);
173
174        assert_eq!(out[0]["timestamp"], json!(1234));
175        assert_eq!(out[0]["k"], json!(1));
176        assert_eq!(out[1], json!("txt"));
177        assert_eq!(out[2], json!(null));
178    }
179
180    #[test]
181    fn json_convert_rfc3339_timestamp_converts_numeric_timestamp() {
182        let vals = vec![
183            json!({"timestamp": 0, "name": "a"}),
184            json!({"timestamp": "bad"}),
185            json!(42),
186        ];
187        let out = json_convert_rfc3339_timestamp(vals);
188
189        assert_eq!(out[0]["timestamp"], json!("1970-01-01T00:00:00.000Z"));
190        assert_eq!(out[0]["name"], json!("a"));
191        assert_eq!(out[1]["timestamp"], json!("bad"));
192        assert_eq!(out[2], json!(42));
193    }
194
195    #[test]
196    fn local_date_hour_returns_expected_shape_and_none_for_invalid() {
197        let invalid_ms = i64::MIN as u64;
198        let s = local_date_hour(0).expect("epoch should produce local datetime");
199        println!("Local datetime for epoch: {}", s);
200        // 1970-01-01 08AM +08:00
201        let parts: Vec<&str> = s.split(' ').collect();
202
203        assert_eq!(parts.len(), 3);
204        assert_eq!(parts[0], "1970-01-01");
205        assert_eq!(parts[1].len(), 4);
206        assert!(parts[1].ends_with("AM") || parts[1].ends_with("PM"));
207        assert!(parts[1][..2].chars().all(|c| c.is_ascii_digit()));
208        assert!(parts[2].starts_with('+') || parts[2].starts_with('-'));
209        assert_eq!(parts[2].len(), 6);
210
211        assert_eq!(local_date_hour(invalid_ms), None);
212    }
213
214    #[test]
215    fn truncate_utf8_to_max_bytes_respects_budget_and_grapheme_boundaries() {
216        // Within budget: untouched.
217        let mut text = "hello".to_string();
218        assert_eq!(truncate_utf8_to_max_bytes(&mut text, 5), None);
219        assert_eq!(text, "hello");
220
221        // ASCII over budget: truncated exactly to the byte budget.
222        let mut text = "hello world".to_string();
223        assert_eq!(truncate_utf8_to_max_bytes(&mut text, 5), Some(5));
224        assert_eq!(text, "hello");
225
226        // A multibyte codepoint is never split: "héllo" is 6 bytes (é = 2 bytes). A 2-byte budget
227        // backs off to the boundary after "h".
228        let mut text = "héllo".to_string();
229        assert_eq!(truncate_utf8_to_max_bytes(&mut text, 2), Some(1));
230        assert_eq!(text, "h");
231
232        // A multi-codepoint grapheme cluster (family emoji joined by ZWJ, 25 bytes) is never split.
233        // A budget that lands mid-cluster backs off to the previous cluster boundary.
234        let family = "👨‍👩‍👧‍👦";
235        assert_eq!(family.len(), 25);
236        let mut text = family.repeat(3); // 75 bytes
237        let cutoff = truncate_utf8_to_max_bytes(&mut text, 60).unwrap();
238        assert_eq!(cutoff, 50);
239        assert_eq!(text, family.repeat(2));
240
241        // Budget smaller than the first cluster: truncated to empty rather than split.
242        let mut text = family.to_string();
243        assert_eq!(truncate_utf8_to_max_bytes(&mut text, 10), Some(0));
244        assert!(text.is_empty());
245    }
246}