Skip to main content

sttp_core_rs/parsing/
sttp_node_parser.rs

1use std::collections::HashMap;
2
3use chrono::{DateTime, Utc};
4use once_cell::sync::Lazy;
5use regex::{Captures, Regex};
6
7use crate::domain::models::{AvecState, ParseResult, SttpNode};
8
9static TIMESTAMP_RX: Lazy<Regex> = Lazy::new(|| {
10    Regex::new(r#"timestamp:\s*"(?P<v>[^"]+)""#).expect("timestamp regex must compile")
11});
12
13static TIER_RX: Lazy<Regex> = Lazy::new(|| {
14    Regex::new(r"(?i)tier:\s*(?P<v>raw|daily|weekly|monthly|quarterly|yearly)")
15        .expect("tier regex must compile")
16});
17
18static COMPRESSION_DEPTH_RX: Lazy<Regex> = Lazy::new(|| {
19    Regex::new(r"compression_depth:\s*(?P<v>\d+)").expect("compression_depth regex must compile")
20});
21
22static PARENT_NODE_RX: Lazy<Regex> = Lazy::new(|| {
23    Regex::new(
24        r#"parent_node:\s*(?:ref:(?P<ref>[^,\s}\]]+)|"(?P<quoted>[^"]+)"|(?P<null>null))"#,
25    )
26    .expect("parent regex must compile")
27});
28
29static AVEC_RX: Lazy<Regex> = Lazy::new(|| {
30    Regex::new(
31        r"(?s)(?P<name>user_avec|model_avec|compression_avec)\s*:\s*\{\s*stability\s*:\s*(?P<stability>[\d.]+),\s*friction\s*:\s*(?P<friction>[\d.]+),\s*logic\s*:\s*(?P<logic>[\d.]+),\s*autonomy\s*:\s*(?P<autonomy>[\d.]+)(?:,\s*psi\s*:\s*(?P<psi>[\d.]+))?\s*\}",
32    )
33    .expect("avec regex must compile")
34});
35
36static RHO_RX: Lazy<Regex> =
37    Lazy::new(|| Regex::new(r"rho:\s*(?P<v>[\d.]+)").expect("rho regex must compile"));
38static KAPPA_RX: Lazy<Regex> =
39    Lazy::new(|| Regex::new(r"kappa:\s*(?P<v>[\d.]+)").expect("kappa regex must compile"));
40static PSI_RX: Lazy<Regex> = Lazy::new(|| {
41    Regex::new(r"(?s)⍉⟨.*?psi:\s*(?P<v>[\d.]+)").expect("psi regex must compile")
42});
43
44#[derive(Debug, Default, Clone, Copy)]
45pub struct SttpNodeParser;
46
47impl SttpNodeParser {
48    pub fn new() -> Self {
49        Self
50    }
51
52    pub fn try_parse(&self, raw: &str, session_id: &str) -> ParseResult {
53        let metrics_block = extract_metrics_block(raw);
54
55        let mut avec_map: HashMap<String, AvecState> = HashMap::new();
56        for caps in AVEC_RX.captures_iter(raw) {
57            if let Some(name) = caps.name("name") {
58                avec_map.insert(name.as_str().to_string(), parse_avec(&caps));
59            }
60        }
61
62        if let Some(caps) = AVEC_RX.captures(metrics_block) {
63            if caps.name("name").map(|m| m.as_str()) == Some("compression_avec") {
64                avec_map.insert("compression_avec".to_string(), parse_avec(&caps));
65            }
66        }
67
68        let node = SttpNode {
69            raw: raw.to_string(),
70            session_id: session_id.to_string(),
71            tier: TIER_RX
72                .captures(raw)
73                .and_then(|c| c.name("v"))
74                .map(|m| m.as_str().to_string())
75                .unwrap_or_default(),
76            timestamp: parse_timestamp(raw),
77            compression_depth: parse_int(&COMPRESSION_DEPTH_RX, raw),
78            parent_node_id: parse_parent_node(raw),
79            sync_key: String::new(),
80            updated_at: Utc::now(),
81            source_metadata: None,
82            user_avec: avec_map
83                .get("user_avec")
84                .copied()
85                .unwrap_or_else(AvecState::zero),
86            model_avec: avec_map
87                .get("model_avec")
88                .copied()
89                .unwrap_or_else(AvecState::zero),
90            compression_avec: Some(
91                avec_map
92                    .get("compression_avec")
93                    .copied()
94                    .unwrap_or_else(AvecState::zero),
95            ),
96            rho: parse_float(&RHO_RX, metrics_block),
97            kappa: parse_float(&KAPPA_RX, metrics_block),
98            psi: parse_float(&PSI_RX, metrics_block),
99        };
100
101        ParseResult::ok(node)
102    }
103}
104
105fn parse_avec(caps: &Captures<'_>) -> AvecState {
106    AvecState {
107        stability: parse_group_float(caps, "stability"),
108        friction: parse_group_float(caps, "friction"),
109        logic: parse_group_float(caps, "logic"),
110        autonomy: parse_group_float(caps, "autonomy"),
111    }
112}
113
114fn parse_timestamp(raw: &str) -> DateTime<Utc> {
115    let maybe_ts = TIMESTAMP_RX
116        .captures(raw)
117        .and_then(|c| c.name("v"))
118        .map(|m| m.as_str());
119
120    if let Some(ts) = maybe_ts {
121        if let Ok(parsed) = DateTime::parse_from_rfc3339(ts) {
122            return parsed.with_timezone(&Utc);
123        }
124    }
125
126    Utc::now()
127}
128
129fn parse_parent_node(raw: &str) -> Option<String> {
130    let caps = PARENT_NODE_RX.captures(raw)?;
131    if caps.name("null").is_some() {
132        return None;
133    }
134
135    if let Some(v) = caps.name("ref") {
136        return Some(v.as_str().to_string());
137    }
138
139    if let Some(v) = caps.name("quoted") {
140        return Some(v.as_str().to_string());
141    }
142
143    None
144}
145
146fn parse_int(rx: &Regex, raw: &str) -> i32 {
147    rx.captures(raw)
148        .and_then(|c| c.name("v"))
149        .and_then(|v| v.as_str().parse::<i32>().ok())
150        .unwrap_or(0)
151}
152
153fn parse_float(rx: &Regex, raw: &str) -> f32 {
154    rx.captures(raw)
155        .and_then(|c| c.name("v"))
156        .and_then(|v| v.as_str().parse::<f32>().ok())
157        .unwrap_or(0.0)
158}
159
160fn parse_group_float(caps: &Captures<'_>, group: &str) -> f32 {
161    caps.name(group)
162        .and_then(|v| v.as_str().parse::<f32>().ok())
163        .unwrap_or(0.0)
164}
165
166fn extract_metrics_block(raw: &str) -> &str {
167    if let Some(idx) = raw.find("⍉⟨") {
168        &raw[idx..]
169    } else {
170        ""
171    }
172}